diff --git a/ci/deps/actions-37.yaml b/ci/deps/actions-37.yaml index 61f431256dd4a..9292e2aa7db39 100644 --- a/ci/deps/actions-37.yaml +++ b/ci/deps/actions-37.yaml @@ -25,4 +25,5 @@ dependencies: - flask - tabulate - pyreadstat + - pyreadr - pip diff --git a/ci/deps/azure-macos-37.yaml b/ci/deps/azure-macos-37.yaml index 8c8b49ff3df5b..1812fd16e3668 100644 --- a/ci/deps/azure-macos-37.yaml +++ b/ci/deps/azure-macos-37.yaml @@ -33,4 +33,5 @@ dependencies: - pip: - cython>=0.29.21 - pyreadstat + - pyreadr - pyxlsb diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml index c9d22ffbead45..6e7be62cdc56f 100644 --- a/ci/deps/azure-windows-37.yaml +++ b/ci/deps/azure-windows-37.yaml @@ -37,6 +37,7 @@ dependencies: - xlsxwriter - xlwt - pyreadstat + - pyreadr - pip - pip: - pyxlsb diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index b6351ac2232ff..08300ab839d95 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -360,6 +360,7 @@ zlib Compression for HDF5 fastparquet 0.4.0 Parquet reading / writing pyarrow 0.15.0 Parquet, ORC, and feather reading / writing pyreadstat SPSS files (.sav) reading +pyreadr R files (.RData, .rda, .rds) reading / writing ========================= ================== ============================================================= Access data in the cloud diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 3b7a6037a9715..f4bbde8efcd92 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -31,6 +31,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like binary;`Parquet Format `__;:ref:`read_parquet`;:ref:`to_parquet` binary;`ORC Format `__;:ref:`read_orc`; binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` + binary;`R `__;:ref:`read_rdata`;:ref:`to_rdata` binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` binary;`SAS `__;:ref:`read_sas`; binary;`SPSS `__;:ref:`read_spss`; @@ -5903,6 +5904,289 @@ respective functions from ``pandas-gbq``. Full documentation can be found `here `__. + +.. _io.rdata: + +R data format +------------- + +.. _io.rdata_reader: + +Reading R data +'''''''''''''' + +.. versionadded:: 1.3.0 + +The top-level function ``read_rdata`` will read the native serialization types +in the R language and environment. For .RData and its synonymous shorthand, .rda, +that can hold multiple R objects, method will return a ``dict`` of ``DataFrames``. +For .rds types that only contains a single R object, method will return a single +``DataFrame``. + +.. note:: + + Since any R object can be saved in these types, this method will only return + data.frame objects or objects coercible to data.frames including matrices, + tibbles, and data.tables and to some extent, arrays. + +For example, consider the following generated data.frames in R using environment +data samples from US EPA, UK BGCI, and NOAA pubilc data: + +.. code-block:: r + + ghg_df <- data.frame( + gas = c("Carbon dioxide", "Methane", "Nitrous oxide", + "Fluorinated gases", "Total"), + year = c(2018, 2018, 2018, 2018, 2018), + emissions = c(5424.88150213288, 634.457127078267, 434.528555376666, + 182.782432461777, 6676.64961704959), + row.names = c(141:145), + stringsAsFactors = FALSE + ) + + saveRDS(ghg_df, file="ghg_df.rds") + + plants_df <- data.frame( + plant_group = c("Pteridophytes", "Pteridophytes", "Pteridophytes", + "Pteridophytes", "Pteridophytes"), + status = c("Data Deficient", "Extinct", "Not Threatened", + "Possibly Threatened", "Threatened"), + count = c(398, 65, 1294, 408, 1275), + row.names = c(16:20), + stringsAsFactors = FALSE + ) + + saveRDS(plants_df, file="plants_df.rds") + + sea_ice_df_new <- data.frame( + year = c(2016, 2017, 2018, 2019, 2020), + mo = c(12, 12, 12, 12, 12), + data.type = c("Goddard", "Goddard", "Goddard", "Goddard", "NRTSI-G"), + region = c("S", "S", "S", "S", "S"), + extent = c(8.28, 9.48, 9.19, 9.41, 10.44), + area = c(5.51, 6.23, 5.59, 6.59, 6.5), + row.names = c(1012:1016), + stringsAsFactors = FALSE + ) + + saveRDS(sea_ice_df, file="sea_ice_df.rds") + + save(ghg_df, plants_df, sea_ice_df, file="env_data_dfs.rda") + +With ``read_rdata``, you can read these above .rds or .rda files: + +.. ipython:: python + :suppress: + + rel_path = os.path.join("..", "pandas", "tests", "io", "data", "rdata") + file_path = os.path.abspath(rel_path) + +.. ipython:: python + + rds_file = os.path.join(file_path, "ghg_df.rds") + ghg_df = pd.read_rdata(rds_file).tail() + ghg_df + + rda_file = os.path.join(file_path, "env_data_dfs.rda") + env_dfs = pd.read_rdata(rda_file) + {k: df.tail() for k, df in env_dfs.items()} + +To ignore the rownames of data.frame, use option ``rownames=False``: + +.. ipython:: python + + rds_file = os.path.join(file_path, "plants_df.rds") + plants_df = pd.read_rdata(rds_file, rownames=False).tail() + plants_df + + +To select specific objects in .rda, pass a list of names into ``select_frames``: + +.. ipython:: python + + rda_file = os.path.join(file_path, "env_data_dfs.rda") + env_dfs = pd.read_rdata(rda_file, select_frames=["sea_ice_df"]) + env_dfs + +To read from a file-like object, read object in argument, ``path_or_buffer``: + +.. ipython:: python + + rds_file = os.path.join(file_path, "plants_df.rds") + with open(rds_file, "rb") as f: + plants_df = pd.read_rdata(f.read(), file_format="rds") + + plants_df + +To read from URL, pass link directly into method: + +.. ipython:: python + + url = ("https://github.com/hadley/nycflights13/" + "blob/master/data/airlines.rda?raw=true") + + airlines = pd.read_rdata(url, file_format="rda") + airlines + +To read from an Amazon S3 bucket, point to the storage path. This also raises +another issue. Any R data encoded in non utf-8 is currently not supported: + +.. code-block:: ipython + + In [608]: ghcran = pd.read_rdata("s3://public-r-data/ghcran.Rdata") + ... + UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 45: invalid continuation byte + +Also, remember if R data files do not contain any data frame object, a parsing error +will occur: + +.. code-block:: ipython + + In [608]: rds_file = os.path.join(file_path, "env_data_non_dfs.rda") + ... + LibrdataError: Invalid file, or file has unsupported features + + +.. _io.rdata_writer: + +Please note R's ``Date`` (without time component) will translate to ``object`` type +in pandas. Also, R's date/time field type, ``POSIXct``, will translate to UTC time +in pandas. + +.. ipython:: python + + ppm_df = pd.read_rdata(os.path.join(file_path, "ppm_df.rds")) + ppm_df.head() + ppm_df.tail() + ppm_df.dtypes + +Writing R data +'''''''''''''' + +.. versionadded:: 1.3.0 + +The method :func:`~pandas.core.frame.DataFrame.to_rdata` will write a DataFrame +or multiple DataFrames into R data files (.RData, .rda, and .rds). + +For a single DataFrame in rds type, pass in a file or buffer in method: + +.. ipython:: python + + plants_df.to_rdata("plants_df.rds") + +For a single DataFrame in RData or rda types, pass in a file or buffer in method +and optionally give it a name: + +.. ipython:: python + + ghg_df.to_rdata("ghg_df.rda", rda_name="ghg_df") + +While RData and rda types can hold multiple R objects, this method currently +only supports writing out a single DataFrame. + +Even write to a buffer and read its content: + +.. ipython:: python + + with BytesIO() as b_io: + env_dfs["sea_ice_df"].to_rdata(b_io, file_format="rda", index=False) + print( + pd.read_rdata( + b_io.getvalue(), + file_format="rda", + rownames=False, + )["pandas_dataframe"].tail() + ) + +While DataFrame index will not map into R rownames, by default ``index=True`` +will output as a named column or multiple columns for MultiIndex. + +.. ipython:: python + + ghg_df.rename_axis(None).to_rdata("ghg_df.rds") + + pd.read_rdata("ghg_df.rds").tail() + +To ignore the index, use ``index=False``: + +.. ipython:: python + + ghg_df.rename_axis(None).to_rdata("ghg_df.rds", index=False) + + pd.read_rdata("ghg_df.rds").tail() + +By default, these R serialized types are compressed files in either gzip, bzip2, +or xz algorithms. Similarly to R, the default type in this method is "gzip" or +"gz". Notice difference of compressed and uncompressed files + +.. ipython:: python + + plants_df.to_rdata("plants_df_gz.rds") + plants_df.to_rdata("plants_df_bz2.rds", compression="bz2") + plants_df.to_rdata("plants_df_xz.rds", compression="xz") + plants_df.to_rdata("plants_df_non_comp.rds", compression=None) + + os.stat("plants_df_gz.rds").st_size + os.stat("plants_df_bz2.rds").st_size + os.stat("plants_df_xz.rds").st_size + os.stat("plants_df_non_comp.rds").st_size + +Like other IO methods, ``storage_options`` are enabled to write to those platforms: + +.. code-block:: ipython + + ghg_df.to_rdata( + "s3://path/to/my/storage/pandas_df.rda", + storage_options={"user": "xxx", "password": "???"} + ) + +.. ipython:: python + :suppress: + + os.remove("ghg_df.rds") + os.remove("ghg_df.rda") + os.remove("plants_df.rds") + os.remove("plants_df_gz.rds") + os.remove("plants_df_bz2.rds") + os.remove("plants_df_xz.rds") + os.remove("plants_df_non_comp.rds") + +Once exported, the single DataFrame can be read back in R or multiple DataFrames +loaded in R: + +.. code-block:: r + + plants_df <- readRDS("plants_df.rds") + plants_df + plant_group status count + 16 Pteridophytes Data Deficient 398 + 17 Pteridophytes Extinct 65 + 18 Pteridophytes Not Threatened 1294 + 19 Pteridophytes Possibly Threatened 408 + 20 Pteridophytes Threatened 1275 + + load("ghg_df.rda") + + mget(list=ls()) + $ghg_df + gas year emissions + 141 Carbon dioxide 2018 5424.8815 + 142 Methane 2018 634.4571 + 143 Nitrous oxide 2018 434.5286 + 144 Fluorinated gases 2018 182.7824 + 145 Total 2018 6676.6496 + +For more information of the underlying ``pyreadr`` package, see main page of +`pyreadr`_ for further notes on support and limitations. For more information of R +serialization data types, see docs on `rds`_ and `rda`_ data files. + +.. _pyreadr: https://github.com/ofajardo/pyreadr + +.. _rds: https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/readRDS + +.. _rda: https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/save + + .. _io.stata: Stata format @@ -5958,6 +6242,7 @@ outside of this range, the variable is cast to ``int16``. 115 dta file format. Attempting to write *Stata* dta files with strings longer than 244 characters raises a ``ValueError``. + .. _io.stata_reader: Reading from Stata format diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 85d9acff353be..dc5a87f39e1b7 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -110,6 +110,107 @@ both XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`) For more, see :ref:`io.xml` in the user guide on IO tools. +.. _whatsnew_130.read_to_rdata: + +Read and write R data files +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We added I/O support to read and write R data files (.RData, .rda, .rds) using +:func:`pandas.read_rdata` and :meth:`DataFrame.to_rdata`. Both methods rely on +the `pyreadr`_ package to support open source data migration between R and +Python pandas. (:issue:`40287`) + +.. _pyreadr: https://github.com/ofajardo/pyreadr + +For example, consider the below generated data frame and matrix in R: + +.. code-block:: r + + In [1]: carbon_ppm_df <- data.frame( + ...: year = c(2020, 2020, 2020, 2021, 2021), + ...: month = c(10, 11, 12, 1, 2), + ...: monthly_average = c(411.51, 413.11, 414.25, 415.52, 416.75), + ...: num_days = c(30, 27, 30, 29, 28), + ...: st_dev_of_days = c(0.22, 0.8, 0.48, 0.44, 1.01), + ...: unc_mon_mean = c(0.08, 0.29, 0.17, 0.16, 0.36) + ...: ) + + In [2]: iucn_species_mtx <- matrix( + ...: c(102, 79, 159, 63, 30, 13, 267, 35, 85, + ...: 30, 10, 5, 1, 2, 7, 14, 2, 2, + ...: 409, 121, 22, 75, 40, 78, 134, 146, 28, + ...: 29, 6, 0, 0, 0, 12, 2, 1, 0, + ...: 3770, 627, 223, 365, 332, 699, 604, 663, 225, + ...: 6972, 989, 460, 730, 588, 1302, 518, 1060, 542, + ...: 7089, 1219, 798, 831, 538, 1051, 975, 719, 556, + ...: 2990, 4251, 52, 2819, 1220, 914, 1648, 1184, 845, + ...: 43885, 20685, 11158, 10865, 8492, 8192, 7326, 7212, 5940 + ...: ), + ...: ncol=9, nrow=9, + ...: dimnames = list( + ...: c("MAGNOLIOPSIDA", "ACTINOPTERYGII", "AVES", + ...: "INSECTA", "REPTILIA", "LILIOPSIDA", + ...: "GASTROPODA", "AMPHIBIA", "MAMMALIA"), + ...: c("EX", "EW", "CR(PE)", "CR(PEW)", "CR", + ...: "EN", "VU", "DD", "Total") + ...: ) + ...: ) + + In [3]: saveRDS(ppm_df, "ppm_df_r.rds") + In [4]: save(ppm_df, iucn_species_mtx, "env_objs_r.rda") + +Now, both R data files can be read in pandas to return either DataFrame +for .rds types or ``dict`` of DataFrames for .RData and .rda types: + +.. code-block:: ipython + + In [1]: ppm_df = pd.read_rdata("ppm_df_r.rds") + In [2]: ppm_df + Out[3]: + year month monthly_average num_days st_dev_of_days unc_mon_mean + 0 2020 10 411.51 30 0.22 0.08 + 1 2020 11 413.11 27 0.80 0.29 + 2 2020 12 414.25 30 0.48 0.17 + 3 2021 1 415.52 29 0.44 0.16 + 4 2021 2 416.75 28 1.01 0.36 + + In [4]: env_objs = pd.read_rdata("env_objs_r.rda") + Out[5]: + {'carbon_ppm_df': + year month monthly_average num_days st_dev_of_days unc_mon_mean + 0 2020 10 411.51 30 0.22 0.08 + 1 2020 11 413.11 27 0.80 0.29 + 2 2020 12 414.25 30 0.48 0.17 + 3 2021 1 415.52 29 0.44 0.16 + 4 2021 2 416.75 28 1.01 0.36 + + [5 rows x 6 columns], + 'iucn_species_mtx': + EX EW CR(PE) CR(PEW) CR EN VU DD Total + rownames + MAGNOLIOPSIDA 102 30 409 29 3770 6972 7089 2990 43885 + ACTINOPTERYGII 79 10 121 6 627 989 1219 4251 20685 + AVES 159 5 22 0 223 460 798 52 11158 + INSECTA 63 1 75 0 365 730 831 2819 10865 + REPTILIA 30 2 40 0 332 588 538 1220 8492 + LILIOPSIDA 13 7 78 12 699 1302 1051 914 8192 + GASTROPODA 267 14 134 2 604 518 975 1648 7326 + AMPHIBIA 35 2 146 1 663 1060 719 1184 7212 + + [8 rows x 9 columns]} + +Additionally, pandas data can be written back out into the same R data files: + +.. code-block:: ipython + + In [5]: ppm_df.to_rdata("ppm_df_py.rds") + In [6]: env_objs['iucn_species_mtx'].to_rdata( + ...: "iucn_species_py.rda", + ...: rda_name="iucn_species_df" + ...: ) + +For more, see :ref:`io.rdata` in the user guide on IO tools. + Styler Upgrades ^^^^^^^^^^^^^^^ @@ -178,7 +279,6 @@ For example: df df.rolling("2D", center=True).mean() - .. _whatsnew_130.enhancements.other: Other enhancements diff --git a/environment.yml b/environment.yml index 146bf6db08d8b..02bb4c929365e 100644 --- a/environment.yml +++ b/environment.yml @@ -112,6 +112,7 @@ dependencies: - xarray # DataFrame.to_xarray - cftime # Needed for downstream xarray.CFTimeIndex test - pyreadstat # pandas.read_spss + - pyreadr # pandas.read_rdata, DataFrame.to_rdata - tabulate>=0.8.3 # DataFrame.to_markdown - natsort # DataFrame.sort_values - pip: diff --git a/pandas/__init__.py b/pandas/__init__.py index db4043686bcbb..498696938d079 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -171,6 +171,7 @@ read_stata, read_sas, read_spss, + read_rdata, ) from pandas.io.json import _json_normalize as json_normalize diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 38766d2856cfe..15220093493e2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2290,6 +2290,132 @@ def _from_arrays( ) return cls(mgr) + @doc(storage_options=generic._shared_docs["storage_options"]) + def to_rdata( + self, + path_or_buffer: FilePathOrBuffer, + file_format: str = "infer", + rda_name: str = "pandas_dataframe", + index: bool = True, + compression: CompressionOptions = "gzip", + storage_options: StorageOptions = None, + ) -> None: + """ + Render one or more DataFrames to R data (.RData, .rda, .rds). + + .. versionadded:: 1.3.0 + + Parameters + ---------- + path_or_buffer : a valid str, path object or file-like object + Any valid string path is acceptable. + + file_format : {{'infer', 'rda', 'rdata', 'rds'}}, default 'infer' + R serialization type generated from native commands: base::save + (that saves multiple objects) or base::saveRDS (that saves a + single object to disk). Default 'infer' will use extension in file + name to determine the format type. + + rda_name : str, default "pandas_dataframe" + Name for R data.frame in RData/rda file. + + index : bool, default True + Include index or MulitIndex in output as separate columns. Since + DataFrame indexes can include multiple columns and R rownames can + only include one column, DataFrame index will not map to R data.frame + rownames. + + compression : {{'gzip', 'bz2', 'xz', None}}, default 'gzip' + Compression type for on-the-fly decompression of on-disk data. + + {storage_options} + + Returns + ------- + None + Either None for successful output or raises an error. + + See Also + -------- + to_stata : Convert DataFrame to a Stata dataset. + to_parquet : Convert DataFrame to parquet format. + to_feather: Convert DataFrame to feather formatt. + + Examples + -------- + To save an .rds file which only contains a single DataFrame: + + >>> ghg_df = pd.DataFrame( + ... {{'gas': ['Carbon dioxide', 'Methane', + ... 'Nitrous oxide', + ... 'Fluorinated gases', + ... 'Total'], + ... 'year': [2018, 2018, 2018, 2018, 2018], + ... 'emissions': [5424.88, 634.46, 434.53, + ... 182.78, 6676.65] + ... }}) + >>> ghg_df.to_rdata("ghg_df.rds") + + >>> R_code = ''' + ... ghg_df <- readRDS("ghg_df.rds") + ... ghg_df + ... index gas year emissions + ... 1 0 Carbon dioxide 2018 5424.88 + ... 2 1 Methane 2018 634.46 + ... 3 2 Nitrous oxide 2018 434.53 + ... 4 3 Fluorinated gases 2018 182.78 + ... 5 4 Total 2018 6676.65 + ... ''' + + To save an .rda or .RData file: + + >>> plants_df = pd.DataFrame( + ... {{'plant_group': ['Pteridophytes', + ... 'Pteridophytes', + ... 'Pteridophytes', + ... 'Pteridophytes', + ... 'Pteridophytes'], + ... 'status': ['Data Deficient', + ... 'Extinct', + ... 'Not Threatened', + ... 'Possibly Threatened', + ... 'Threatened'], + ... 'count': [398, 65, 1294, 408, 1275] + ... }}) + >>> plants_df.to_rdata( + ... "plants_df.rda", + ... rda_name="plants_df", + ... ) # doctest: +SKIP + + >>> R_code = ''' + ... load("plants_df.rda") + ... + ... mget(ls()) + ... $plants_df + ... index plant_group status count + ... 1 0 Pteridophytes Data Deficient 398 + ... 2 1 Pteridophytes Extinct 65 + ... 3 2 Pteridophytes Not Threatened 1294 + ... 4 3 Pteridophytes Possibly Threatened 408 + ... 5 4 Pteridophytes Threatened 1275 + ... ''' + """ + from pandas.io.rdata import PyReadrWriter + + import_optional_dependency("pyreadr") + + rdata_writer = PyReadrWriter( + self, + path_or_buffer=path_or_buffer, + file_format=file_format, + rda_name=rda_name, + index=index, + compression=compression, + storage_options=storage_options, + ) + + return rdata_writer.write_data() + @doc(storage_options=generic._shared_docs["storage_options"]) @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_stata( diff --git a/pandas/io/api.py b/pandas/io/api.py index 5926f2166ee9d..9cacb014e7dd0 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -29,6 +29,7 @@ HDFStore, read_hdf, ) +from pandas.io.rdata import read_rdata from pandas.io.sas import read_sas from pandas.io.spss import read_spss from pandas.io.sql import ( diff --git a/pandas/io/rdata.py b/pandas/io/rdata.py new file mode 100644 index 0000000000000..4114b6d1f8349 --- /dev/null +++ b/pandas/io/rdata.py @@ -0,0 +1,598 @@ +import io +import os +from tempfile import TemporaryDirectory +from typing import ( + Dict, + List, + Optional, + Union, +) + +from pandas._typing import ( + Buffer, + CompressionOptions, + FilePathOrBuffer, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency +from pandas.errors import AbstractMethodError +from pandas.util._decorators import doc + +from pandas.core.dtypes.common import is_list_like + +from pandas.core.frame import DataFrame +from pandas.core.shared_docs import _shared_docs + +from pandas.io.common import ( + file_exists, + get_handle, + is_fsspec_url, + is_url, + stringify_path, +) + + +@doc(storage_options=_shared_docs["storage_options"]) +def read_rdata( + path_or_buffer: FilePathOrBuffer, + file_format: str = "infer", + select_frames: Optional[List[str]] = None, + rownames: bool = True, + storage_options: StorageOptions = None, +) -> Union[DataFrame, Dict[str, DataFrame]]: + r""" + Read R data (.RData, .rda, .rds) into DataFrame or ``dict`` of DataFrames. + + .. versionadded:: 1.3.0 + + Parameters + ---------- + path_or_buffer : str, path object, or file-like object + Any valid file path is acceptable. The string could be a URL. + Valid URL schemes include http, ftp, s3, and file. + + file_format : {{'infer', 'rdata', 'rda', 'rds'}}, default 'infer' + R serialization type as output from R's base::save or base::saveRDS + commands. Default 'infer' will use extension in file name to + to determine the format type. + + select_frames : list, default None + Selected names of DataFrames to return from R rda and RData types that + can contain multiple objects. + + rownames : bool, default True + Include original rownames in R data frames to map into a DataFrame index. + + {storage_options} + + Returns + ------- + DataFrame or dict of DataFrames + Depends on R data type where rds formats returns a single DataFrame and + rda or RData formats return ``dict`` of DataFrames. + + See Also + -------- + read_sas : Read SAS datasets into DataFrame. + read_stata : Read Stata datasets into DataFrame. + read_spss : Read SPSS datasets into DataFrame. + + Notes + ----- + Any R data file that contains a non-data.frame object may raise parsing errors. + Method will return data.frame, matrix, and data.frame like object such as + tibbles and data.tables. + + For ``pyreadr`` engine, ``select_frames`` above is synonymous to ``use_objects`` + in package's `read_r` method. Also, ``timezone`` argument defaults to current + system regional timezone in order to correspond to original date/times in R. + + Examples + -------- + For an .rds file which only contains a single R object, method returns a + DataFrame: + + >>> R_code = ''' + ... ghg_df <- data.frame( + ... gas = c('Carbon dioxide', + ... 'Methane', + ... 'Nitrous oxide', + ... 'Fluorinated gases', + ... 'Total'), + ... year = c(2018, + ... 2018, + ... 2018, + ... 2018, + ... 2018), + ... emissions = c(5424.88, + ... 634.46, + ... 434.53, + ... 182.78, + ... 6676.65) + ... ) + ... saveRDS(ghg_df, file="ghg_df.rds") + ... ''' + + >>> ghg_df = pd.read_rdata("ghg_df.rds") # doctest: +SKIP + >>> ghg_df # doctest: +SKIP + gas year emissions + rownames + 1 Carbon dioxide 2018 5424.88 + 2 Methane 2018 634.46 + 3 Nitrous oxide 2018 434.53 + 4 Fluorinated gases 2018 182.78 + 5 Total 2018 6676.65 + + For an .RData or .rda file which can contain multiple R objects, method + returns a ``dict`` of DataFrames: + + >>> R_code = ''' + ... plants_df <- pd.DataFrame( + ... plant_group = c('Pteridophytes', + ... 'Pteridophytes', + ... 'Pteridophytes', + ... 'Pteridophytes', + ... 'Pteridophytes'), + ... status = c('Data Deficient', + ... 'Extinct', + ... 'Not Threatened', + ... 'Possibly Threatened', + ... 'Threatened'), + ... count = c(398, 65, 1294, 408, 1275) + ... ) + ... sea_ice_df <- pd.DataFrame( + ... year = c(2016, 2017, 2018, 2019, 2020), + ... mo = c(12, 12, 12, 12, 12], + ... data.type: c('Goddard', + ... 'Goddard', + ... 'Goddard', + ... 'Goddard', + ... 'NRTSI-G'), + ... region = c('S', 'S', 'S', 'S', 'S'), + ... extent = c(8.28, 9.48, 9.19, 9.41, 10.44), + ... area = c(5.51, 6.23, 5.59, 6.59, 6.5) + ... ) + ... save(ghg_df, plants_df, sea_ice_df, file="env_data_dfs.rda") + ... ''' + + >>> env_dfs = pd.read_rdata("env_data_dfs.rda") # doctest: +SKIP + >>> env_dfs # doctest: +SKIP + {{'ghg_df': + gas year emissions + rownames + 1 Carbon dioxide 2018 5424.88 + 2 Methane 2018 634.46 + 3 Nitrous oxide 2018 434.53 + 4 Fluorinated gases 2018 182.79 + 5 Total 2018 6676.65, + 'plants_df': + plant_group status count + rownames + 1 Pteridophytes Data Deficient 398 + 2 Pteridophytes Extinct 65 + 3 Pteridophytes Not Threatened 1294 + 4 Pteridophytes Possibly Threatened 408 + 5 Pteridophytes Threatened 1275, + 'sea_ice_df': + year mo data.type region extent area + rownames + 1 2016 12 Goddard S 8.28 5.51 + 2 2017 12 Goddard S 9.48 6.23 + 3 2018 12 Goddard S 9.19 5.59 + 4 2019 12 Goddard S 9.41 6.59 + 5 2020 12 NRTSI-G S 10.44 6.50}} + """ + + import_optional_dependency("pyreadr") + + rdr = _PyReadrParser( + path_or_buffer, + file_format, + select_frames, + rownames, + storage_options, + ) + + return rdr.parse_data() + + +def _get_data_from_filepath( + filepath_or_buffer, + encoding, + compression, + storage_options, +) -> Union[str, bytes, Buffer]: + """ + Extract raw R data. + + The method accepts three input types: + 1. filepath (string-like) + 2. file-like object (e.g. open file object, BytesIO) + 3. R data file in ascii or binary content + + This method turns (1) into (2) to simplify the rest of the processing. + It returns input types (2) and (3) unchanged. + """ + filepath_or_buffer = stringify_path(filepath_or_buffer) + + if ( + not isinstance(filepath_or_buffer, str) + or is_url(filepath_or_buffer) + or is_fsspec_url(filepath_or_buffer) + or file_exists(filepath_or_buffer) + ): + with get_handle( + filepath_or_buffer, + "rb", + encoding=encoding, + compression=compression, + storage_options=storage_options, + is_text=False, + ) as handle_obj: + filepath_or_buffer = ( + handle_obj.handle.read() + if hasattr(handle_obj.handle, "read") + else handle_obj.handle + ) + else: + raise FileNotFoundError(f"{filepath_or_buffer} file cannot be found.") + + return filepath_or_buffer + + +def _preprocess_data(data) -> Union[io.StringIO, io.BytesIO]: + """ + Convert extracted raw data. + + This method will return underlying data of extracted R data formats. + The data either has a `read` attribute (e.g. a file object or a + StringIO/BytesIO) or is bytes that represents the R data. + """ + + if isinstance(data, str): + data = io.StringIO(data) + + elif isinstance(data, bytes): + data = io.BytesIO(data) + + return data + + +class _RDataReader: + """ + Internal subclass to parse R data files into dict of DataFrames. + + Parameters + ---------- + path_or_buffer : a valid str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. + + file_format : {{'infer', 'rdata', 'rda', 'rds'}}, default 'infer' + R serialization type. + + select_frames : list, default None + Selected names of DataFrames to return from R data. + + rownames : bool, default True + Include original rownames in R data frames. + + storage_options : dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc., + + See also + -------- + pandas.io.rdata._PyReadrParser + + Notes + ----- + To subclass this class effectively you must override the following methods:` + * :func:`handle_rownames` + * :func:`parse_data` + + See each method's respective documentation for details on their + functionality. + """ + + def __init__( + self, + path_or_buffer, + file_format, + select_frames, + rownames, + storage_options, + ) -> None: + self.path_or_buffer = path_or_buffer + self.file_format = file_format.lower() + self.select_frames = select_frames + self.rownames = rownames + self.storage_options = storage_options + + def verify_params(self) -> None: + """ + Verify user entries of parameters. + + This method will check the values and types of select parameters + and raise appropriate errors. + """ + + path_ext: Optional[str] = ( + os.path.splitext(self.path_or_buffer.lower())[1][1:] + if isinstance(self.path_or_buffer, str) + else None + ) + + if self.file_format not in ["infer", "rdata", "rda", "rds"]: + raise ValueError( + f"'{self.file_format}' is not a valid value for file_format" + ) + + if ( + self.file_format == "infer" + and isinstance(self.path_or_buffer, str) + and path_ext not in ["rdata", "rda", "rds"] + ) or (self.file_format == "infer" and not isinstance(self.path_or_buffer, str)): + raise ValueError( + f"Unable to infer file format from file name: {self.path_or_buffer}. " + "Please use known R data type (rdata, rda, rds)." + ) + + if self.file_format == "infer" and isinstance(path_ext, str): + self.file_format = path_ext + + if self.select_frames is not None and not is_list_like(self.select_frames): + raise TypeError( + f"{type(self.select_frames).__name__} is " + "not a valid type for select_frames" + ) + + def buffer_to_disk(self, tmp_dir: str) -> str: + """ + Convert path or buffer to disk file. + + This method will convert path_or_buffer to temp file + for pyreadr to parse from disk. + """ + + r_temp = os.path.join(tmp_dir, "rdata.rda") + + handle_data = _get_data_from_filepath( + filepath_or_buffer=self.path_or_buffer, + encoding="utf-8", + compression=None, + storage_options=self.storage_options, + ) + + with _preprocess_data(handle_data) as r_data: + if isinstance(r_data, io.BytesIO): + with open(r_temp, "wb") as f: + f.write(r_data.read()) + + return r_temp + + def handle_row_names(self) -> DataFrame: + """ + Migrate R rownames to DataFrame index. + + This method will conditionally adjust index to reflect + original R rownames. + """ + + raise AbstractMethodError(self) + + def parse_data(self) -> Union[DataFrame, Dict[str, DataFrame]]: + """ + Parse R data files. + + This method will run engine methods to return a single DataFrame + for rds type or dictionary of DataFrames for RData or rda types. + """ + + raise AbstractMethodError(self) + + +class _PyReadrParser(_RDataReader): + """ + Internal class to parse R data types using third-party + package, pyreadr. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.verify_params() + + def handle_rownames(self, df) -> DataFrame: + if not self.rownames: + df = df.reset_index(drop=True) + df.index.name = None + + if self.rownames and df.index.name != "rownames": + df.index.name = "rownames" + if df.index[0] == 0: + df.index += 1 + + return df + + def parse_data(self) -> Union[DataFrame, Dict[str, DataFrame]]: + from pyreadr import read_r + + with TemporaryDirectory() as tmp_dir: + r_temp = self.buffer_to_disk(tmp_dir) + rdata = read_r(r_temp, use_objects=self.select_frames) + + rdata = {k: self.handle_rownames(df) for k, df in rdata.items()} + rdata = rdata[None] if self.file_format == "rds" else dict(rdata) + + return rdata + + +class RDataWriter: + """ + Subclass to write pandas DataFrames into R data files. + + Parameters + ---------- + path_or_buffer : a valid str, path object or file-like object + Any valid string path is acceptable. + + file_format : {{'infer', 'rdata', 'rda', 'rds'}}, default 'infer' + R serialization type. + + rda_name : str, default "pandas_dataframe" + Name for exported DataFrame in rda file. + + index : bool, default True + Include index or MultiIndex in output as separate columns. + + compression : {'gzip', 'bz2', 'xz', None}, default 'gzip' + Compression type for on-the-fly decompression of on-disk data. + + storage_options : dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc. + + See also + -------- + pandas.io.rdata.PyReadrWriter + + Notes + ----- + To subclass this class effectively you must override the following methods:` + * :func:`write_data` + + See each method's respective documentation for details on their + functionality. + """ + + def __init__( + self, + frame: DataFrame, + path_or_buffer: FilePathOrBuffer, + file_format: str = "infer", + rda_name: str = "pandas_dataframe", + index: bool = True, + compression: CompressionOptions = "gzip", + storage_options: StorageOptions = None, + ) -> None: + self.frame = frame + self.path_or_buffer = path_or_buffer + self.file_format = file_format.lower() + self.rda_name = rda_name + self.index = index + self.compression = compression + self.storage_options = storage_options + + def verify_params(self) -> None: + """ + Verify user entries of parameters. + + This method will check the values and types of select parameters + and raise appropriate errors. + """ + + path_ext: Optional[str] = ( + os.path.splitext(self.path_or_buffer.lower())[1][1:] + if isinstance(self.path_or_buffer, str) + else None + ) + + if self.file_format not in ["infer", "rdata", "rda", "rds"]: + raise ValueError( + f"{self.file_format} is not a valid value for file_format." + ) + + if ( + self.file_format == "infer" + and isinstance(self.path_or_buffer, str) + and path_ext not in ["rdata", "rda", "rds"] + ): + raise ValueError( + f"Unable to infer file format from file name: {self.path_or_buffer}" + "Please use known R data type (rdata, rda, rds)." + ) + + if self.file_format == "infer" and isinstance(path_ext, str): + self.file_format = path_ext + + if self.compression is not None and self.compression not in [ + "gzip", + "bz2", + "xz", + ]: + raise ValueError( + f"{self.compression} is not a supported value for compression." + ) + + def disk_to_buffer(self, r_file: str) -> None: + """ + Save temp file to path or buffer. + + This method will convert written R data to path_or_buffer. + """ + + with open(r_file, "rb") as rdata: + with get_handle( + self.path_or_buffer, + "wb", + compression=self.compression, + storage_options=self.storage_options, + is_text=False, + ) as handles: + handles.handle.write(rdata.read()) # type: ignore[arg-type] + + return None + + def write_data(self) -> None: + """ + Write DataFrames to R data files. + + This method will run engine methods to export DataFrames + to R data files. + """ + + raise AbstractMethodError(self) + + +class PyReadrWriter(RDataWriter): + """ + Main class called in `pandas.core.frame` to write DataFrame to R + data types using third-party package, pyreadr. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.verify_params() + + def write_data(self) -> None: + from pyreadr import ( + write_rdata, + write_rds, + ) + + self.frame = ( + self.frame.reset_index() + if self.index + else self.frame.reset_index(drop=True) + ) + + with TemporaryDirectory() as tmp_dir: + r_temp = os.path.join(tmp_dir, "rdata.rda") + + if self.file_format in ["rda", "rdata"]: + write_rdata( + path=r_temp, + df=self.frame, + df_name=self.rda_name, + compress=None, + ) + elif self.file_format == "rds": + write_rds( + path=r_temp, + df=self.frame, + compress=None, + ) + + self.disk_to_buffer(r_temp) + + return None diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index c36552f59da71..8db573b69aa68 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -162,6 +162,7 @@ class TestPDApi(Base): "read_xml", "read_json", "read_pickle", + "read_rdata", "read_sas", "read_sql", "read_sql_query", diff --git a/pandas/tests/io/data/rdata/climate_non_utf8_df.rda b/pandas/tests/io/data/rdata/climate_non_utf8_df.rda new file mode 100644 index 0000000000000..a506806405f5e Binary files /dev/null and b/pandas/tests/io/data/rdata/climate_non_utf8_df.rda differ diff --git a/pandas/tests/io/data/rdata/climate_non_utf8_df.rds b/pandas/tests/io/data/rdata/climate_non_utf8_df.rds new file mode 100644 index 0000000000000..85a65550ad80f Binary files /dev/null and b/pandas/tests/io/data/rdata/climate_non_utf8_df.rds differ diff --git a/pandas/tests/io/data/rdata/env_data_dfs.rda b/pandas/tests/io/data/rdata/env_data_dfs.rda new file mode 100644 index 0000000000000..07fbef3ecb00d Binary files /dev/null and b/pandas/tests/io/data/rdata/env_data_dfs.rda differ diff --git a/pandas/tests/io/data/rdata/env_data_non_dfs.rda b/pandas/tests/io/data/rdata/env_data_non_dfs.rda new file mode 100644 index 0000000000000..e1b6bbb5e117e Binary files /dev/null and b/pandas/tests/io/data/rdata/env_data_non_dfs.rda differ diff --git a/pandas/tests/io/data/rdata/env_data_objs.rda b/pandas/tests/io/data/rdata/env_data_objs.rda new file mode 100644 index 0000000000000..61731d7774e45 Binary files /dev/null and b/pandas/tests/io/data/rdata/env_data_objs.rda differ diff --git a/pandas/tests/io/data/rdata/ghg_df.rds b/pandas/tests/io/data/rdata/ghg_df.rds new file mode 100644 index 0000000000000..18c91b7acf9d7 Binary files /dev/null and b/pandas/tests/io/data/rdata/ghg_df.rds differ diff --git a/pandas/tests/io/data/rdata/ghg_t_tests.rds b/pandas/tests/io/data/rdata/ghg_t_tests.rds new file mode 100644 index 0000000000000..e58879d33c1c8 Binary files /dev/null and b/pandas/tests/io/data/rdata/ghg_t_tests.rds differ diff --git a/pandas/tests/io/data/rdata/plants_arry.rds b/pandas/tests/io/data/rdata/plants_arry.rds new file mode 100644 index 0000000000000..e1d7032acebeb Binary files /dev/null and b/pandas/tests/io/data/rdata/plants_arry.rds differ diff --git a/pandas/tests/io/data/rdata/plants_df.rds b/pandas/tests/io/data/rdata/plants_df.rds new file mode 100644 index 0000000000000..5b9f58f6483ba Binary files /dev/null and b/pandas/tests/io/data/rdata/plants_df.rds differ diff --git a/pandas/tests/io/data/rdata/ppm_df.csv b/pandas/tests/io/data/rdata/ppm_df.csv new file mode 100644 index 0000000000000..4a2663110dca3 --- /dev/null +++ b/pandas/tests/io/data/rdata/ppm_df.csv @@ -0,0 +1,757 @@ +"year","month","decimal_date","monthly_average","de_seasonalized","num_days","st_dev_of_days","unc_mon_mean" +1958,3,1958.2027,315.7,314.43,-1,-9.99,-0.99 +1958,4,1958.2877,317.45,315.16,-1,-9.99,-0.99 +1958,5,1958.3699,317.51,314.71,-1,-9.99,-0.99 +1958,6,1958.4548,317.24,315.14,-1,-9.99,-0.99 +1958,7,1958.537,315.86,315.18,-1,-9.99,-0.99 +1958,8,1958.6219,314.93,316.18,-1,-9.99,-0.99 +1958,9,1958.7068,313.2,316.08,-1,-9.99,-0.99 +1958,10,1958.789,312.43,315.41,-1,-9.99,-0.99 +1958,11,1958.874,313.33,315.2,-1,-9.99,-0.99 +1958,12,1958.9562,314.67,315.43,-1,-9.99,-0.99 +1959,1,1959.0411,315.58,315.55,-1,-9.99,-0.99 +1959,2,1959.126,316.48,315.86,-1,-9.99,-0.99 +1959,3,1959.2027,316.65,315.38,-1,-9.99,-0.99 +1959,4,1959.2877,317.72,315.41,-1,-9.99,-0.99 +1959,5,1959.3699,318.29,315.49,-1,-9.99,-0.99 +1959,6,1959.4548,318.15,316.03,-1,-9.99,-0.99 +1959,7,1959.537,316.54,315.86,-1,-9.99,-0.99 +1959,8,1959.6219,314.8,316.06,-1,-9.99,-0.99 +1959,9,1959.7068,313.84,316.73,-1,-9.99,-0.99 +1959,10,1959.789,313.33,316.33,-1,-9.99,-0.99 +1959,11,1959.874,314.81,316.68,-1,-9.99,-0.99 +1959,12,1959.9562,315.58,316.35,-1,-9.99,-0.99 +1960,1,1960.041,316.43,316.4,-1,-9.99,-0.99 +1960,2,1960.1257,316.98,316.36,-1,-9.99,-0.99 +1960,3,1960.2049,317.58,316.28,-1,-9.99,-0.99 +1960,4,1960.2896,319.03,316.7,-1,-9.99,-0.99 +1960,5,1960.3716,320.04,317.22,-1,-9.99,-0.99 +1960,6,1960.4563,319.59,317.47,-1,-9.99,-0.99 +1960,7,1960.5383,318.18,317.52,-1,-9.99,-0.99 +1960,8,1960.623,315.9,317.19,-1,-9.99,-0.99 +1960,9,1960.7077,314.17,317.08,-1,-9.99,-0.99 +1960,10,1960.7896,313.83,316.83,-1,-9.99,-0.99 +1960,11,1960.8743,315,316.88,-1,-9.99,-0.99 +1960,12,1960.9563,316.19,316.96,-1,-9.99,-0.99 +1961,1,1961.0411,316.89,316.86,-1,-9.99,-0.99 +1961,2,1961.126,317.7,317.08,-1,-9.99,-0.99 +1961,3,1961.2027,318.54,317.26,-1,-9.99,-0.99 +1961,4,1961.2877,319.48,317.16,-1,-9.99,-0.99 +1961,5,1961.3699,320.58,317.76,-1,-9.99,-0.99 +1961,6,1961.4548,319.77,317.63,-1,-9.99,-0.99 +1961,7,1961.537,318.57,317.88,-1,-9.99,-0.99 +1961,8,1961.6219,316.79,318.06,-1,-9.99,-0.99 +1961,9,1961.7068,314.99,317.9,-1,-9.99,-0.99 +1961,10,1961.789,315.31,318.32,-1,-9.99,-0.99 +1961,11,1961.874,316.1,317.99,-1,-9.99,-0.99 +1961,12,1961.9562,317.01,317.79,-1,-9.99,-0.99 +1962,1,1962.0411,317.94,317.91,-1,-9.99,-0.99 +1962,2,1962.126,318.55,317.92,-1,-9.99,-0.99 +1962,3,1962.2027,319.68,318.39,-1,-9.99,-0.99 +1962,4,1962.2877,320.57,318.24,-1,-9.99,-0.99 +1962,5,1962.3699,321.02,318.18,-1,-9.99,-0.99 +1962,6,1962.4548,320.62,318.47,-1,-9.99,-0.99 +1962,7,1962.537,319.61,318.92,-1,-9.99,-0.99 +1962,8,1962.6219,317.4,318.68,-1,-9.99,-0.99 +1962,9,1962.7068,316.25,319.17,-1,-9.99,-0.99 +1962,10,1962.789,315.42,318.45,-1,-9.99,-0.99 +1962,11,1962.874,316.69,318.58,-1,-9.99,-0.99 +1962,12,1962.9562,317.7,318.47,-1,-9.99,-0.99 +1963,1,1963.0411,318.74,318.7,-1,-9.99,-0.99 +1963,2,1963.126,319.07,318.44,-1,-9.99,-0.99 +1963,3,1963.2027,319.86,318.57,-1,-9.99,-0.99 +1963,4,1963.2877,321.38,319.05,-1,-9.99,-0.99 +1963,5,1963.3699,322.25,319.4,-1,-9.99,-0.99 +1963,6,1963.4548,321.48,319.32,-1,-9.99,-0.99 +1963,7,1963.537,319.74,319.05,-1,-9.99,-0.99 +1963,8,1963.6219,317.77,319.05,-1,-9.99,-0.99 +1963,9,1963.7068,316.21,319.14,-1,-9.99,-0.99 +1963,10,1963.789,315.99,319.02,-1,-9.99,-0.99 +1963,11,1963.874,317.07,318.97,-1,-9.99,-0.99 +1963,12,1963.9562,318.35,319.13,-1,-9.99,-0.99 +1964,1,1964.041,319.57,319.54,-1,-9.99,-0.99 +1964,2,1964.1257,320.01,319.37,-1,-9.99,-0.99 +1964,3,1964.2049,320.74,319.41,-1,-9.99,-0.99 +1964,4,1964.2896,321.84,319.45,-1,-9.99,-0.99 +1964,5,1964.3716,322.26,319.4,-1,-9.99,-0.99 +1964,6,1964.4563,321.89,319.75,-1,-9.99,-0.99 +1964,7,1964.5383,320.44,319.77,-1,-9.99,-0.99 +1964,8,1964.623,318.69,320,-1,-9.99,-0.99 +1964,9,1964.7077,316.7,319.66,-1,-9.99,-0.99 +1964,10,1964.7896,316.87,319.91,-1,-9.99,-0.99 +1964,11,1964.8743,317.68,319.58,-1,-9.99,-0.99 +1964,12,1964.9563,318.71,319.49,-1,-9.99,-0.99 +1965,1,1965.0411,319.44,319.4,-1,-9.99,-0.99 +1965,2,1965.126,320.44,319.81,-1,-9.99,-0.99 +1965,3,1965.2027,320.89,319.59,-1,-9.99,-0.99 +1965,4,1965.2877,322.14,319.78,-1,-9.99,-0.99 +1965,5,1965.3699,322.17,319.3,-1,-9.99,-0.99 +1965,6,1965.4548,321.87,319.7,-1,-9.99,-0.99 +1965,7,1965.537,321.21,320.51,-1,-9.99,-0.99 +1965,8,1965.6219,318.87,320.15,-1,-9.99,-0.99 +1965,9,1965.7068,317.81,320.77,-1,-9.99,-0.99 +1965,10,1965.789,317.3,320.36,-1,-9.99,-0.99 +1965,11,1965.874,318.87,320.78,-1,-9.99,-0.99 +1965,12,1965.9562,319.42,320.2,-1,-9.99,-0.99 +1966,1,1966.0411,320.62,320.59,-1,-9.99,-0.99 +1966,2,1966.126,321.6,320.96,-1,-9.99,-0.99 +1966,3,1966.2027,322.39,321.08,-1,-9.99,-0.99 +1966,4,1966.2877,323.7,321.34,-1,-9.99,-0.99 +1966,5,1966.3699,324.08,321.2,-1,-9.99,-0.99 +1966,6,1966.4548,323.75,321.57,-1,-9.99,-0.99 +1966,7,1966.537,322.38,321.68,-1,-9.99,-0.99 +1966,8,1966.6219,320.36,321.65,-1,-9.99,-0.99 +1966,9,1966.7068,318.64,321.6,-1,-9.99,-0.99 +1966,10,1966.789,318.1,321.17,-1,-9.99,-0.99 +1966,11,1966.874,319.78,321.7,-1,-9.99,-0.99 +1966,12,1966.9562,321.03,321.81,-1,-9.99,-0.99 +1967,1,1967.0411,322.33,322.29,-1,-9.99,-0.99 +1967,2,1967.126,322.5,321.86,-1,-9.99,-0.99 +1967,3,1967.2027,323.04,321.73,-1,-9.99,-0.99 +1967,4,1967.2877,324.42,322.04,-1,-9.99,-0.99 +1967,5,1967.3699,325,322.12,-1,-9.99,-0.99 +1967,6,1967.4548,324.09,321.91,-1,-9.99,-0.99 +1967,7,1967.537,322.54,321.84,-1,-9.99,-0.99 +1967,8,1967.6219,320.92,322.21,-1,-9.99,-0.99 +1967,9,1967.7068,319.25,322.23,-1,-9.99,-0.99 +1967,10,1967.789,319.39,322.47,-1,-9.99,-0.99 +1967,11,1967.874,320.73,322.65,-1,-9.99,-0.99 +1967,12,1967.9562,321.96,322.75,-1,-9.99,-0.99 +1968,1,1968.041,322.57,322.54,-1,-9.99,-0.99 +1968,2,1968.1257,323.15,322.51,-1,-9.99,-0.99 +1968,3,1968.2049,323.89,322.55,-1,-9.99,-0.99 +1968,4,1968.2896,325.02,322.62,-1,-9.99,-0.99 +1968,5,1968.3716,325.57,322.68,-1,-9.99,-0.99 +1968,6,1968.4563,325.36,323.19,-1,-9.99,-0.99 +1968,7,1968.5383,324.14,323.46,-1,-9.99,-0.99 +1968,8,1968.623,322.11,323.43,-1,-9.99,-0.99 +1968,9,1968.7077,320.33,323.32,-1,-9.99,-0.99 +1968,10,1968.7896,320.25,323.33,-1,-9.99,-0.99 +1968,11,1968.8743,321.32,323.25,-1,-9.99,-0.99 +1968,12,1968.9563,322.89,323.69,-1,-9.99,-0.99 +1969,1,1969.0411,324,323.97,-1,-9.99,-0.99 +1969,2,1969.126,324.42,323.77,-1,-9.99,-0.99 +1969,3,1969.2027,325.63,324.31,-1,-9.99,-0.99 +1969,4,1969.2877,326.66,324.27,-1,-9.99,-0.99 +1969,5,1969.3699,327.38,324.48,-1,-9.99,-0.99 +1969,6,1969.4548,326.71,324.51,-1,-9.99,-0.99 +1969,7,1969.537,325.88,325.17,-1,-9.99,-0.99 +1969,8,1969.6219,323.66,324.97,-1,-9.99,-0.99 +1969,9,1969.7068,322.38,325.37,-1,-9.99,-0.99 +1969,10,1969.789,321.78,324.88,-1,-9.99,-0.99 +1969,11,1969.874,322.86,324.79,-1,-9.99,-0.99 +1969,12,1969.9562,324.12,324.91,-1,-9.99,-0.99 +1970,1,1970.0411,325.06,325.03,-1,-9.99,-0.99 +1970,2,1970.126,325.98,325.34,-1,-9.99,-0.99 +1970,3,1970.2027,326.93,325.61,-1,-9.99,-0.99 +1970,4,1970.2877,328.13,325.74,-1,-9.99,-0.99 +1970,5,1970.3699,328.08,325.16,-1,-9.99,-0.99 +1970,6,1970.4548,327.67,325.46,-1,-9.99,-0.99 +1970,7,1970.537,326.34,325.63,-1,-9.99,-0.99 +1970,8,1970.6219,324.69,325.99,-1,-9.99,-0.99 +1970,9,1970.7068,323.1,326.1,-1,-9.99,-0.99 +1970,10,1970.789,323.06,326.18,-1,-9.99,-0.99 +1970,11,1970.874,324.01,325.95,-1,-9.99,-0.99 +1970,12,1970.9562,325.13,325.93,-1,-9.99,-0.99 +1971,1,1971.0411,326.17,326.14,-1,-9.99,-0.99 +1971,2,1971.126,326.68,326.03,-1,-9.99,-0.99 +1971,3,1971.2027,327.17,325.85,-1,-9.99,-0.99 +1971,4,1971.2877,327.79,325.38,-1,-9.99,-0.99 +1971,5,1971.3699,328.93,326,-1,-9.99,-0.99 +1971,6,1971.4548,328.57,326.36,-1,-9.99,-0.99 +1971,7,1971.537,327.36,326.65,-1,-9.99,-0.99 +1971,8,1971.6219,325.43,326.74,-1,-9.99,-0.99 +1971,9,1971.7068,323.36,326.37,-1,-9.99,-0.99 +1971,10,1971.789,323.56,326.69,-1,-9.99,-0.99 +1971,11,1971.874,324.8,326.75,-1,-9.99,-0.99 +1971,12,1971.9562,326.01,326.82,-1,-9.99,-0.99 +1972,1,1972.041,326.77,326.73,-1,-9.99,-0.99 +1972,2,1972.1257,327.63,326.98,-1,-9.99,-0.99 +1972,3,1972.2049,327.75,326.39,-1,-9.99,-0.99 +1972,4,1972.2896,329.72,327.29,-1,-9.99,-0.99 +1972,5,1972.3716,330.07,327.14,-1,-9.99,-0.99 +1972,6,1972.4563,329.09,326.88,-1,-9.99,-0.99 +1972,7,1972.5383,328.04,327.36,-1,-9.99,-0.99 +1972,8,1972.623,326.32,327.67,-1,-9.99,-0.99 +1972,9,1972.7077,324.84,327.87,-1,-9.99,-0.99 +1972,10,1972.7896,325.2,328.33,-1,-9.99,-0.99 +1972,11,1972.8743,326.5,328.45,-1,-9.99,-0.99 +1972,12,1972.9563,327.55,328.36,-1,-9.99,-0.99 +1973,1,1973.0411,328.55,328.51,-1,-9.99,-0.99 +1973,2,1973.126,329.56,328.91,-1,-9.99,-0.99 +1973,3,1973.2027,330.3,328.96,-1,-9.99,-0.99 +1973,4,1973.2877,331.5,329.08,-1,-9.99,-0.99 +1973,5,1973.3699,332.48,329.54,-1,-9.99,-0.99 +1973,6,1973.4548,332.07,329.84,-1,-9.99,-0.99 +1973,7,1973.537,330.87,330.15,-1,-9.99,-0.99 +1973,8,1973.6219,329.31,330.63,-1,-9.99,-0.99 +1973,9,1973.7068,327.51,330.55,-1,-9.99,-0.99 +1973,10,1973.789,327.18,330.32,-1,-9.99,-0.99 +1973,11,1973.874,328.16,330.13,-1,-9.99,-0.99 +1973,12,1973.9562,328.64,329.45,-1,-9.99,-0.99 +1974,1,1974.0411,329.35,329.32,-1,-9.99,-0.99 +1974,2,1974.126,330.71,330.05,-1,-9.99,-0.99 +1974,3,1974.2027,331.48,330.14,-1,-9.99,-0.99 +1974,4,1974.2877,332.65,330.22,-1,-9.99,-0.99 +1974,5,1974.375,333.19,330.22,13,0.31,0.16 +1974,6,1974.4583,332.2,329.79,25,0.37,0.14 +1974,7,1974.5417,331.07,330.21,24,0.24,0.09 +1974,8,1974.625,329.15,330.54,26,0.31,0.12 +1974,9,1974.7083,327.33,330.44,22,0.47,0.19 +1974,10,1974.7917,327.28,330.52,24,0.22,0.09 +1974,11,1974.875,328.31,330.5,26,0.43,0.16 +1974,12,1974.9583,329.58,330.56,29,0.29,0.1 +1975,1,1975.0417,330.73,330.84,29,0.43,0.15 +1975,2,1975.125,331.46,330.85,26,0.46,0.17 +1975,3,1975.2083,331.94,330.37,17,0.33,0.15 +1975,4,1975.2917,333.11,330.53,23,0.59,0.24 +1975,5,1975.375,333.95,330.97,28,0.35,0.13 +1975,6,1975.4583,333.42,331.01,27,0.48,0.18 +1975,7,1975.5417,331.97,331.12,24,0.45,0.18 +1975,8,1975.625,329.95,331.33,24,0.47,0.18 +1975,9,1975.7083,328.5,331.6,22,0.53,0.22 +1975,10,1975.7917,328.36,331.61,11,0.21,0.12 +1975,11,1975.875,329.38,331.57,18,0.31,0.14 +1975,12,1975.9583,330.62,331.6,-1,-9.99,-0.99 +1976,1,1976.0417,331.56,331.67,19,0.23,0.1 +1976,2,1976.125,332.74,332.13,22,0.49,0.2 +1976,3,1976.2083,333.36,331.79,18,0.52,0.23 +1976,4,1976.2917,334.74,332.16,18,0.77,0.35 +1976,5,1976.375,334.72,331.75,21,0.56,0.23 +1976,6,1976.4583,333.98,331.56,15,0.21,0.1 +1976,7,1976.5417,333.08,332.22,15,0.24,0.12 +1976,8,1976.625,330.68,332.07,23,0.51,0.2 +1976,9,1976.7083,328.96,332.07,13,0.69,0.37 +1976,10,1976.7917,328.72,331.97,19,0.57,0.25 +1976,11,1976.875,330.16,332.35,25,0.36,0.14 +1976,12,1976.9583,331.62,332.6,20,0.38,0.16 +1977,1,1977.0417,332.68,332.77,23,0.4,0.16 +1977,2,1977.125,333.17,332.57,20,0.34,0.15 +1977,3,1977.2083,334.96,333.4,23,0.51,0.21 +1977,4,1977.2917,336.14,333.54,20,0.5,0.21 +1977,5,1977.375,336.93,333.99,20,0.31,0.13 +1977,6,1977.4583,336.17,333.79,22,0.4,0.16 +1977,7,1977.5417,334.88,334,20,0.23,0.1 +1977,8,1977.625,332.56,333.9,18,0.46,0.21 +1977,9,1977.7083,331.29,334.36,19,0.46,0.2 +1977,10,1977.7917,331.28,334.5,23,0.29,0.12 +1977,11,1977.875,332.46,334.69,21,0.43,0.18 +1977,12,1977.9583,333.6,334.59,25,0.36,0.14 +1978,1,1978.0417,334.94,335.01,22,0.52,0.21 +1978,2,1978.125,335.26,334.59,25,0.5,0.19 +1978,3,1978.2083,336.66,335,28,0.59,0.21 +1978,4,1978.2917,337.69,335.07,18,0.44,0.2 +1978,5,1978.375,338.02,335.07,26,0.46,0.17 +1978,6,1978.4583,338.01,335.59,17,0.31,0.15 +1978,7,1978.5417,336.5,335.65,20,0.32,0.14 +1978,8,1978.625,334.42,335.87,19,0.32,0.14 +1978,9,1978.7083,332.36,335.51,17,0.75,0.35 +1978,10,1978.7917,332.45,335.72,21,0.34,0.14 +1978,11,1978.875,333.76,335.99,24,0.25,0.1 +1978,12,1978.9583,334.91,335.89,26,0.33,0.12 +1979,1,1979.0417,336.14,336.22,27,0.55,0.2 +1979,2,1979.125,336.69,336,25,0.3,0.11 +1979,3,1979.2083,338.27,336.56,21,0.63,0.26 +1979,4,1979.2917,338.82,336.11,24,0.67,0.26 +1979,5,1979.375,339.24,336.24,20,0.5,0.22 +1979,6,1979.4583,339.26,336.83,19,0.35,0.15 +1979,7,1979.5417,337.54,336.69,26,0.59,0.22 +1979,8,1979.625,335.72,337.2,24,0.6,0.23 +1979,9,1979.7083,333.98,337.19,19,0.65,0.29 +1979,10,1979.7917,334.24,337.57,25,0.42,0.16 +1979,11,1979.875,335.32,337.59,27,0.3,0.11 +1979,12,1979.9583,336.81,337.83,22,0.23,0.09 +1980,1,1980.0417,337.9,338.13,29,0.57,0.2 +1980,2,1980.125,338.34,337.85,26,0.49,0.18 +1980,3,1980.2083,340.07,338.51,23,0.54,0.22 +1980,4,1980.2917,340.93,338.31,24,0.29,0.11 +1980,5,1980.375,341.45,338.4,24,0.54,0.21 +1980,6,1980.4583,341.36,338.85,20,0.39,0.17 +1980,7,1980.5417,339.45,338.56,26,0.6,0.22 +1980,8,1980.625,337.67,339.07,16,1.05,0.5 +1980,9,1980.7083,336.25,339.37,15,0.69,0.34 +1980,10,1980.7917,336.14,339.4,26,0.26,0.1 +1980,11,1980.875,337.3,339.46,27,0.26,0.1 +1980,12,1980.9583,338.29,339.26,24,0.25,0.1 +1981,1,1981.0417,339.29,339.42,28,0.39,0.14 +1981,2,1981.125,340.55,339.97,25,0.65,0.25 +1981,3,1981.2083,341.63,340.09,25,0.48,0.19 +1981,4,1981.2917,342.6,340,26,0.46,0.17 +1981,5,1981.375,343.04,339.98,30,0.19,0.07 +1981,6,1981.4583,342.54,340.05,25,0.29,0.11 +1981,7,1981.5417,340.82,339.92,24,0.46,0.18 +1981,8,1981.625,338.48,339.87,25,0.48,0.18 +1981,9,1981.7083,336.95,340.16,27,0.55,0.2 +1981,10,1981.7917,337.05,340.39,25,0.39,0.15 +1981,11,1981.875,338.58,340.74,26,0.31,0.12 +1981,12,1981.9583,339.91,340.85,20,0.28,0.12 +1982,1,1982.0417,340.93,341.09,28,0.3,0.11 +1982,2,1982.125,341.76,341.15,24,0.49,0.19 +1982,3,1982.2083,342.78,341.18,17,0.41,0.19 +1982,4,1982.2917,343.96,341.34,7,0.42,0.31 +1982,5,1982.375,344.77,341.68,27,0.37,0.14 +1982,6,1982.4583,343.88,341.42,27,0.37,0.14 +1982,7,1982.5417,342.42,341.61,28,0.35,0.13 +1982,8,1982.625,340.24,341.64,25,0.61,0.23 +1982,9,1982.7083,338.38,341.56,21,0.59,0.25 +1982,10,1982.7917,338.41,341.77,26,0.5,0.19 +1982,11,1982.875,339.44,341.58,24,0.39,0.15 +1982,12,1982.9583,340.78,341.7,26,0.3,0.11 +1983,1,1983.0417,341.57,341.75,28,0.47,0.17 +1983,2,1983.125,342.79,342.24,24,0.37,0.15 +1983,3,1983.2083,343.37,341.86,27,0.88,0.32 +1983,4,1983.2917,345.4,342.78,23,0.29,0.12 +1983,5,1983.375,346.14,342.97,28,0.51,0.19 +1983,6,1983.4583,345.76,343.29,20,0.3,0.13 +1983,7,1983.5417,344.32,343.56,22,0.57,0.23 +1983,8,1983.625,342.51,343.89,16,0.73,0.35 +1983,9,1983.7083,340.46,343.59,15,0.5,0.25 +1983,10,1983.7917,340.53,343.86,20,0.31,0.13 +1983,11,1983.875,341.79,343.92,27,0.33,0.12 +1983,12,1983.9583,343.2,344.12,21,0.25,0.1 +1984,1,1984.0417,344.21,344.32,23,0.4,0.16 +1984,2,1984.125,344.92,344.38,23,0.32,0.13 +1984,3,1984.2083,345.68,344.26,19,0.3,0.13 +1984,4,1984.2917,347.14,344.54,2,-9.99,-0.99 +1984,5,1984.375,347.78,344.59,20,0.42,0.18 +1984,6,1984.4583,347.16,344.72,20,0.31,0.13 +1984,7,1984.5417,345.79,345.02,18,0.33,0.15 +1984,8,1984.625,343.74,345.11,12,0.45,0.25 +1984,9,1984.7083,341.59,344.75,14,0.72,0.37 +1984,10,1984.7917,341.86,345.19,12,0.36,0.2 +1984,11,1984.875,343.31,345.4,18,0.41,0.19 +1984,12,1984.9583,345,345.88,14,0.53,0.27 +1985,1,1985.0417,345.48,345.59,25,0.38,0.14 +1985,2,1985.125,346.41,345.91,15,0.37,0.18 +1985,3,1985.2083,347.91,346.57,17,0.34,0.16 +1985,4,1985.2917,348.66,346.1,21,0.61,0.25 +1985,5,1985.375,349.28,346.13,20,0.51,0.22 +1985,6,1985.4583,348.65,346.22,21,0.34,0.14 +1985,7,1985.5417,346.91,346.08,17,0.36,0.17 +1985,8,1985.625,345.26,346.57,16,0.57,0.27 +1985,9,1985.7083,343.47,346.58,24,0.57,0.22 +1985,10,1985.7917,343.35,346.6,20,0.29,0.13 +1985,11,1985.875,344.73,346.82,21,0.4,0.17 +1985,12,1985.9583,346.12,347.04,26,0.62,0.23 +1986,1,1986.0417,346.78,346.82,25,0.31,0.12 +1986,2,1986.125,347.48,346.97,25,0.45,0.17 +1986,3,1986.2083,348.25,346.94,16,0.7,0.34 +1986,4,1986.2917,349.86,347.32,19,0.38,0.17 +1986,5,1986.375,350.52,347.42,18,0.31,0.14 +1986,6,1986.4583,349.98,347.6,17,0.25,0.11 +1986,7,1986.5417,348.25,347.43,20,0.47,0.2 +1986,8,1986.625,346.17,347.51,18,0.48,0.21 +1986,9,1986.7083,345.48,348.61,17,0.63,0.29 +1986,10,1986.7917,344.82,348.04,25,0.32,0.12 +1986,11,1986.875,346.22,348.28,21,0.3,0.13 +1986,12,1986.9583,347.48,348.36,24,0.35,0.14 +1987,1,1987.0417,348.73,348.66,25,0.46,0.17 +1987,2,1987.125,348.92,348.23,25,0.58,0.22 +1987,3,1987.2083,349.81,348.39,21,0.35,0.15 +1987,4,1987.2917,351.4,348.86,26,0.68,0.25 +1987,5,1987.375,352.15,349.09,28,0.37,0.13 +1987,6,1987.4583,351.58,349.28,22,0.21,0.09 +1987,7,1987.5417,350.21,349.51,17,0.73,0.34 +1987,8,1987.625,348.2,349.65,15,0.85,0.42 +1987,9,1987.7083,346.66,349.85,23,0.61,0.24 +1987,10,1987.7917,346.72,349.96,22,0.41,0.17 +1987,11,1987.875,348.08,350.14,23,0.33,0.13 +1987,12,1987.9583,349.28,350.14,27,0.2,0.08 +1988,1,1988.0417,350.51,350.49,24,0.21,0.08 +1988,2,1988.125,351.7,350.99,23,0.57,0.23 +1988,3,1988.2083,352.5,350.99,25,0.78,0.3 +1988,4,1988.2917,353.67,351.03,27,0.48,0.18 +1988,5,1988.375,354.35,351.22,28,0.37,0.13 +1988,6,1988.4583,353.88,351.55,26,0.3,0.11 +1988,7,1988.5417,352.8,352.15,27,0.49,0.18 +1988,8,1988.625,350.49,352.01,26,0.62,0.23 +1988,9,1988.7083,348.97,352.18,26,0.47,0.18 +1988,10,1988.7917,349.37,352.62,26,0.31,0.12 +1988,11,1988.875,350.42,352.53,25,0.2,0.08 +1988,12,1988.9583,351.62,352.52,28,0.36,0.13 +1989,1,1989.0417,353.07,352.99,28,0.45,0.16 +1989,2,1989.125,353.43,352.69,25,0.38,0.15 +1989,3,1989.2083,354.08,352.6,29,0.53,0.19 +1989,4,1989.2917,355.72,353.07,28,0.47,0.17 +1989,5,1989.375,355.95,352.78,27,0.49,0.18 +1989,6,1989.4583,355.44,353.06,26,0.42,0.16 +1989,7,1989.5417,354.05,353.38,26,0.41,0.15 +1989,8,1989.625,351.84,353.43,25,0.48,0.18 +1989,9,1989.7083,350.09,353.37,24,0.69,0.27 +1989,10,1989.7917,350.33,353.57,25,0.34,0.13 +1989,11,1989.875,351.55,353.68,27,0.36,0.13 +1989,12,1989.9583,352.91,353.84,27,0.48,0.18 +1990,1,1990.0417,353.86,353.78,25,0.34,0.13 +1990,2,1990.125,355.1,354.37,28,0.66,0.24 +1990,3,1990.2083,355.75,354.27,27,0.57,0.21 +1990,4,1990.2917,356.38,353.76,28,0.55,0.2 +1990,5,1990.375,357.38,354.23,28,0.3,0.11 +1990,6,1990.4583,356.39,354.02,29,0.4,0.14 +1990,7,1990.5417,354.89,354.24,30,0.89,0.31 +1990,8,1990.625,353.06,354.68,22,0.62,0.25 +1990,9,1990.7083,351.38,354.69,27,0.72,0.26 +1990,10,1990.7917,351.69,354.94,28,0.3,0.11 +1990,11,1990.875,353.14,355.18,24,0.2,0.08 +1990,12,1990.9583,354.41,355.26,28,0.51,0.19 +1991,1,1991.0417,354.93,354.9,28,0.51,0.18 +1991,2,1991.125,355.82,355.11,26,0.54,0.2 +1991,3,1991.2083,357.33,355.79,30,0.73,0.25 +1991,4,1991.2917,358.77,356.13,30,0.66,0.23 +1991,5,1991.375,359.23,356.1,29,0.52,0.19 +1991,6,1991.4583,358.23,355.88,29,0.3,0.11 +1991,7,1991.5417,356.3,355.69,24,0.46,0.18 +1991,8,1991.625,353.97,355.6,23,0.39,0.15 +1991,9,1991.7083,352.34,355.66,27,0.37,0.14 +1991,10,1991.7917,352.43,355.69,27,0.25,0.09 +1991,11,1991.875,353.89,355.87,28,0.25,0.09 +1991,12,1991.9583,355.21,356.02,30,0.34,0.12 +1992,1,1992.0417,356.34,356.29,31,0.6,0.21 +1992,2,1992.125,357.21,356.47,27,0.56,0.21 +1992,3,1992.2083,357.97,356.38,24,0.72,0.28 +1992,4,1992.2917,359.22,356.51,27,0.53,0.2 +1992,5,1992.375,359.71,356.52,26,0.74,0.28 +1992,6,1992.4583,359.43,357.07,30,0.49,0.17 +1992,7,1992.5417,357.15,356.58,25,0.63,0.24 +1992,8,1992.625,354.99,356.67,24,0.62,0.24 +1992,9,1992.7083,353.01,356.36,25,0.98,0.38 +1992,10,1992.7917,353.41,356.72,29,0.56,0.2 +1992,11,1992.875,354.42,356.48,29,0.34,0.12 +1992,12,1992.9583,355.68,356.5,31,0.32,0.11 +1993,1,1993.0417,357.1,357.06,28,0.58,0.21 +1993,2,1993.125,357.42,356.54,28,0.49,0.18 +1993,3,1993.2083,358.59,356.88,30,0.72,0.25 +1993,4,1993.2917,359.39,356.71,25,0.53,0.2 +1993,5,1993.375,360.3,357.14,30,0.45,0.16 +1993,6,1993.4583,359.64,357.24,28,0.35,0.13 +1993,7,1993.5417,357.46,356.87,25,0.78,0.3 +1993,8,1993.625,355.76,357.44,27,0.62,0.23 +1993,9,1993.7083,354.14,357.51,23,0.73,0.29 +1993,10,1993.7917,354.23,357.61,28,0.29,0.11 +1993,11,1993.875,355.53,357.65,29,0.26,0.09 +1993,12,1993.9583,357.03,357.92,29,0.28,0.1 +1994,1,1994.0417,358.36,358.25,27,0.33,0.12 +1994,2,1994.125,359.04,358.21,25,0.5,0.19 +1994,3,1994.2083,360.11,358.41,29,0.82,0.29 +1994,4,1994.2917,361.36,358.59,28,0.5,0.18 +1994,5,1994.375,361.78,358.59,30,0.45,0.16 +1994,6,1994.4583,360.94,358.57,27,0.3,0.11 +1994,7,1994.5417,359.51,358.91,31,0.41,0.14 +1994,8,1994.625,357.59,359.29,24,0.43,0.17 +1994,9,1994.7083,355.86,359.3,24,0.58,0.23 +1994,10,1994.7917,356.21,359.63,28,0.28,0.1 +1994,11,1994.875,357.65,359.8,28,0.51,0.18 +1994,12,1994.9583,359.1,359.96,28,0.46,0.17 +1995,1,1995.0417,360.04,359.91,30,0.47,0.16 +1995,2,1995.125,361,360.18,28,0.52,0.19 +1995,3,1995.2083,361.98,360.37,29,0.78,0.28 +1995,4,1995.2917,363.44,360.76,29,0.65,0.23 +1995,5,1995.375,363.83,360.73,29,0.66,0.24 +1995,6,1995.4583,363.33,360.98,27,0.37,0.14 +1995,7,1995.5417,361.78,361.1,28,0.36,0.13 +1995,8,1995.625,359.33,360.93,24,0.7,0.28 +1995,9,1995.7083,358.32,361.71,24,0.68,0.26 +1995,10,1995.7917,358.14,361.52,29,0.26,0.09 +1995,11,1995.875,359.61,361.75,26,0.24,0.09 +1995,12,1995.9583,360.82,361.67,30,0.36,0.12 +1996,1,1996.0417,362.2,361.98,29,0.38,0.13 +1996,2,1996.125,363.36,362.47,28,0.55,0.2 +1996,3,1996.2083,364.28,362.64,28,0.67,0.24 +1996,4,1996.2917,364.69,361.99,29,0.59,0.21 +1996,5,1996.375,365.25,362.23,30,0.57,0.2 +1996,6,1996.4583,365.06,362.82,30,0.38,0.13 +1996,7,1996.5417,363.69,362.98,31,0.32,0.11 +1996,8,1996.625,361.55,363.13,27,0.49,0.18 +1996,9,1996.7083,359.69,363.14,25,0.75,0.29 +1996,10,1996.7917,359.72,363.12,29,0.32,0.11 +1996,11,1996.875,361.04,363.18,29,0.29,0.1 +1996,12,1996.9583,362.39,363.23,29,0.36,0.13 +1997,1,1997.0417,363.24,363.03,31,0.4,0.14 +1997,2,1997.125,364.21,363.4,28,0.62,0.22 +1997,3,1997.2083,364.65,363.02,31,0.4,0.14 +1997,4,1997.2917,366.48,363.82,21,0.46,0.19 +1997,5,1997.375,366.77,363.87,29,0.53,0.19 +1997,6,1997.4583,365.73,363.56,27,0.23,0.09 +1997,7,1997.5417,364.46,363.74,24,0.47,0.18 +1997,8,1997.625,362.4,363.98,25,0.57,0.22 +1997,9,1997.7083,360.44,363.83,26,0.63,0.24 +1997,10,1997.7917,360.98,364.28,27,0.32,0.12 +1997,11,1997.875,362.65,364.71,30,0.31,0.11 +1997,12,1997.9583,364.51,365.28,30,0.41,0.14 +1998,1,1998.0417,365.39,365.19,30,0.43,0.15 +1998,2,1998.125,366.1,365.29,28,0.62,0.23 +1998,3,1998.2083,367.36,365.73,31,0.82,0.28 +1998,4,1998.2917,368.79,366.17,29,0.63,0.22 +1998,5,1998.375,369.56,366.68,30,0.77,0.27 +1998,6,1998.4583,369.13,366.95,28,0.24,0.09 +1998,7,1998.5417,367.98,367.29,23,0.65,0.26 +1998,8,1998.625,366.1,367.69,30,0.3,0.1 +1998,9,1998.7083,364.16,367.51,28,0.4,0.14 +1998,10,1998.7917,364.54,367.82,30,0.26,0.09 +1998,11,1998.875,365.67,367.7,23,0.25,0.1 +1998,12,1998.9583,367.3,368.05,26,0.36,0.14 +1999,1,1999.0417,368.35,368.13,27,0.47,0.17 +1999,2,1999.125,369.28,368.46,21,0.47,0.2 +1999,3,1999.2083,369.84,368.24,25,0.81,0.31 +1999,4,1999.2917,371.15,368.62,29,0.67,0.24 +1999,5,1999.375,371.12,368.31,26,0.59,0.22 +1999,6,1999.4583,370.46,368.3,26,0.44,0.16 +1999,7,1999.5417,369.61,368.93,27,0.63,0.23 +1999,8,1999.625,367.06,368.63,25,0.38,0.14 +1999,9,1999.7083,364.95,368.28,28,0.74,0.27 +1999,10,1999.7917,365.52,368.8,31,0.28,0.1 +1999,11,1999.875,366.88,368.86,28,0.25,0.09 +1999,12,1999.9583,368.26,368.93,26,0.29,0.11 +2000,1,2000.0417,369.45,369.24,26,0.47,0.18 +2000,2,2000.125,369.71,368.99,19,0.48,0.21 +2000,3,2000.2083,370.75,369.24,30,0.47,0.16 +2000,4,2000.2917,371.98,369.44,27,0.58,0.21 +2000,5,2000.375,371.75,368.87,28,0.53,0.19 +2000,6,2000.4583,371.87,369.66,28,0.24,0.09 +2000,7,2000.5417,370.02,369.36,25,0.31,0.12 +2000,8,2000.625,368.27,369.87,27,0.42,0.15 +2000,9,2000.7083,367.15,370.46,25,0.36,0.14 +2000,10,2000.7917,367.18,370.42,30,0.27,0.09 +2000,11,2000.875,368.53,370.48,25,0.3,0.12 +2000,12,2000.9583,369.83,370.46,30,0.38,0.13 +2001,1,2001.0417,370.76,370.6,30,0.56,0.2 +2001,2,2001.125,371.69,370.95,26,0.61,0.23 +2001,3,2001.2083,372.63,371.06,26,0.46,0.17 +2001,4,2001.2917,373.55,370.99,29,0.56,0.2 +2001,5,2001.375,374.03,371.11,24,0.41,0.16 +2001,6,2001.4583,373.4,371.17,26,0.37,0.14 +2001,7,2001.5417,371.68,371.08,25,0.62,0.24 +2001,8,2001.625,369.78,371.39,27,0.6,0.22 +2001,9,2001.7083,368.34,371.61,28,0.49,0.18 +2001,10,2001.7917,368.61,371.85,31,0.33,0.11 +2001,11,2001.875,369.94,371.92,24,0.24,0.09 +2001,12,2001.9583,371.42,372.09,29,0.4,0.14 +2002,1,2002.0417,372.7,372.48,28,0.52,0.19 +2002,2,2002.125,373.37,372.49,28,0.66,0.24 +2002,3,2002.2083,374.3,372.61,24,0.62,0.24 +2002,4,2002.2917,375.19,372.54,29,0.55,0.19 +2002,5,2002.375,375.93,372.98,29,0.57,0.2 +2002,6,2002.4583,375.69,373.46,28,0.46,0.17 +2002,7,2002.5417,374.16,373.58,25,0.47,0.18 +2002,8,2002.625,372.03,373.7,28,0.65,0.24 +2002,9,2002.7083,370.93,374.29,23,0.74,0.3 +2002,10,2002.7917,370.73,374.06,31,0.62,0.21 +2002,11,2002.875,372.43,374.52,29,0.43,0.15 +2002,12,2002.9583,373.98,374.72,31,0.46,0.16 +2003,1,2003.0417,375.07,374.82,30,0.51,0.18 +2003,2,2003.125,375.82,374.95,27,0.58,0.21 +2003,3,2003.2083,376.64,374.99,28,0.63,0.23 +2003,4,2003.2917,377.92,375.24,27,0.37,0.14 +2003,5,2003.375,378.78,375.73,30,0.78,0.27 +2003,6,2003.4583,378.46,376.21,25,0.39,0.15 +2003,7,2003.5417,376.88,376.37,29,0.7,0.25 +2003,8,2003.625,374.57,376.27,23,0.57,0.23 +2003,9,2003.7083,373.34,376.65,25,0.37,0.14 +2003,10,2003.7917,373.31,376.65,30,0.33,0.12 +2003,11,2003.875,374.84,376.99,26,0.45,0.17 +2003,12,2003.9583,376.17,376.93,27,0.4,0.15 +2004,1,2004.0417,377.17,376.96,30,0.45,0.16 +2004,2,2004.125,378.05,377.19,29,0.74,0.26 +2004,3,2004.2083,379.06,377.4,27,0.84,0.31 +2004,4,2004.2917,380.54,377.8,26,0.52,0.19 +2004,5,2004.375,380.8,377.66,28,0.61,0.22 +2004,6,2004.4583,379.87,377.57,21,0.47,0.19 +2004,7,2004.5417,377.65,377.12,25,0.5,0.19 +2004,8,2004.625,376.17,377.9,16,0.45,0.21 +2004,9,2004.7083,374.43,377.8,15,0.56,0.28 +2004,10,2004.7917,374.63,378,29,0.19,0.07 +2004,11,2004.875,376.33,378.49,29,0.62,0.22 +2004,12,2004.9583,377.68,378.48,30,0.29,0.1 +2005,1,2005.0417,378.63,378.37,31,0.32,0.11 +2005,2,2005.125,379.91,379.1,24,0.6,0.24 +2005,3,2005.2083,380.95,379.45,26,1.16,0.44 +2005,4,2005.2917,382.48,379.84,26,0.53,0.2 +2005,5,2005.375,382.64,379.49,31,0.61,0.21 +2005,6,2005.4583,382.4,380.07,28,0.21,0.08 +2005,7,2005.5417,380.93,380.38,29,0.38,0.13 +2005,8,2005.625,378.93,380.61,26,0.53,0.2 +2005,9,2005.7083,376.89,380.2,27,0.51,0.19 +2005,10,2005.7917,377.19,380.5,14,0.15,0.08 +2005,11,2005.875,378.54,380.69,23,0.45,0.18 +2005,12,2005.9583,380.31,381.09,26,0.39,0.15 +2006,1,2006.0417,381.58,381.33,24,0.31,0.12 +2006,2,2006.125,382.4,381.58,25,0.51,0.2 +2006,3,2006.2083,382.86,381.32,29,0.55,0.2 +2006,4,2006.2917,384.8,382.11,25,0.49,0.19 +2006,5,2006.375,385.22,382.06,24,0.45,0.17 +2006,6,2006.4583,384.24,381.93,28,0.43,0.16 +2006,7,2006.5417,382.65,382.1,24,0.32,0.12 +2006,8,2006.625,380.6,382.27,27,0.47,0.17 +2006,9,2006.7083,379.04,382.35,25,0.42,0.16 +2006,10,2006.7917,379.33,382.66,23,0.4,0.16 +2006,11,2006.875,380.35,382.52,29,0.39,0.14 +2006,12,2006.9583,382.02,382.84,27,0.38,0.14 +2007,1,2007.0417,383.1,382.88,24,0.76,0.3 +2007,2,2007.125,384.12,383.22,21,0.81,0.34 +2007,3,2007.2083,384.81,383.17,27,0.63,0.23 +2007,4,2007.2917,386.73,383.95,25,0.76,0.29 +2007,5,2007.375,386.78,383.56,29,0.64,0.23 +2007,6,2007.4583,386.33,384.06,26,0.42,0.16 +2007,7,2007.5417,384.73,384.25,27,0.44,0.16 +2007,8,2007.625,382.24,383.95,22,0.64,0.26 +2007,9,2007.7083,381.2,384.56,21,0.45,0.19 +2007,10,2007.7917,381.37,384.72,29,0.19,0.07 +2007,11,2007.875,382.7,384.9,30,0.31,0.11 +2007,12,2007.9583,384.19,385.07,22,0.34,0.14 +2008,1,2008.0417,385.78,385.54,31,0.56,0.19 +2008,2,2008.125,386.06,385.2,26,0.58,0.22 +2008,3,2008.2083,386.28,384.72,30,0.6,0.21 +2008,4,2008.2917,387.34,384.71,22,1.19,0.49 +2008,5,2008.375,388.78,385.69,25,0.57,0.22 +2008,6,2008.4583,387.99,385.68,23,0.49,0.2 +2008,7,2008.5417,386.6,386.04,10,0.96,0.58 +2008,8,2008.625,384.32,385.98,25,0.66,0.25 +2008,9,2008.7083,383.41,386.68,27,0.34,0.12 +2008,10,2008.7917,383.21,386.49,23,0.27,0.11 +2008,11,2008.875,384.41,386.59,28,0.29,0.11 +2008,12,2008.9583,385.79,386.64,29,0.27,0.1 +2009,1,2009.0417,387.17,386.86,30,0.38,0.13 +2009,2,2009.125,387.7,386.81,26,0.49,0.18 +2009,3,2009.2083,389.04,387.54,28,0.68,0.25 +2009,4,2009.2917,389.76,387.15,29,0.85,0.3 +2009,5,2009.375,390.36,387.24,30,0.51,0.18 +2009,6,2009.4583,389.7,387.46,29,0.6,0.21 +2009,7,2009.5417,388.25,387.77,22,0.31,0.13 +2009,8,2009.625,386.29,387.99,28,0.62,0.22 +2009,9,2009.7083,384.95,388.22,28,0.56,0.2 +2009,10,2009.7917,384.64,387.88,30,0.31,0.11 +2009,11,2009.875,386.23,388.36,30,0.29,0.1 +2009,12,2009.9583,387.63,388.43,20,0.47,0.2 +2010,1,2010.0417,388.91,388.62,30,0.92,0.32 +2010,2,2010.125,390.41,389.47,20,1.31,0.56 +2010,3,2010.2083,391.37,389.85,25,1.05,0.4 +2010,4,2010.2917,392.67,390.12,26,0.65,0.24 +2010,5,2010.375,393.21,390.09,29,0.65,0.23 +2010,6,2010.4583,392.38,390.1,28,0.42,0.15 +2010,7,2010.5417,390.41,389.94,29,0.47,0.17 +2010,8,2010.625,388.54,390.21,26,0.41,0.16 +2010,9,2010.7083,387.03,390.32,29,0.55,0.19 +2010,10,2010.7917,387.43,390.72,31,0.27,0.09 +2010,11,2010.875,388.87,390.99,29,0.42,0.15 +2010,12,2010.9583,389.99,390.8,29,0.47,0.17 +2011,1,2011.0417,391.5,391.2,29,0.88,0.31 +2011,2,2011.125,392.05,391.12,28,0.47,0.17 +2011,3,2011.2083,392.8,391.28,29,0.97,0.35 +2011,4,2011.2917,393.44,390.84,28,0.73,0.26 +2011,5,2011.375,394.41,391.24,29,0.93,0.33 +2011,6,2011.4583,393.95,391.65,28,0.45,0.16 +2011,7,2011.5417,392.72,392.24,26,0.71,0.26 +2011,8,2011.625,390.33,392.03,27,0.42,0.15 +2011,9,2011.7083,389.28,392.6,26,0.31,0.12 +2011,10,2011.7917,389.19,392.52,30,0.17,0.06 +2011,11,2011.875,390.48,392.63,28,0.26,0.1 +2011,12,2011.9583,392.06,392.86,26,0.37,0.14 +2012,1,2012.0417,393.31,393.08,30,0.77,0.27 +2012,2,2012.125,394.04,393.21,26,1.19,0.45 +2012,3,2012.2083,394.59,393,30,0.63,0.22 +2012,4,2012.2917,396.38,393.65,29,0.59,0.21 +2012,5,2012.375,396.93,393.73,30,0.5,0.17 +2012,6,2012.4583,395.91,393.64,28,0.59,0.21 +2012,7,2012.5417,394.56,394.12,26,0.3,0.11 +2012,8,2012.625,392.59,394.36,30,0.52,0.18 +2012,9,2012.7083,391.32,394.74,26,0.42,0.16 +2012,10,2012.7917,391.27,394.63,28,0.23,0.08 +2012,11,2012.875,393.2,395.24,29,0.53,0.19 +2012,12,2012.9583,394.57,395.27,29,0.44,0.16 +2013,1,2013.0417,395.78,395.63,28,0.6,0.22 +2013,2,2013.125,397.03,396.24,25,0.57,0.22 +2013,3,2013.2083,397.66,396.08,30,0.71,0.25 +2013,4,2013.2917,398.64,395.8,22,0.59,0.24 +2013,5,2013.375,400.02,396.65,28,0.37,0.13 +2013,6,2013.4583,398.81,396.48,26,0.43,0.16 +2013,7,2013.5417,397.51,397.12,21,0.52,0.22 +2013,8,2013.625,395.39,397.26,27,0.45,0.16 +2013,9,2013.7083,393.72,397.23,26,0.35,0.13 +2013,10,2013.7917,393.9,397.24,28,0.16,0.06 +2013,11,2013.875,395.36,397.34,30,0.6,0.21 +2013,12,2013.9583,397.03,397.78,30,0.48,0.17 +2014,1,2014.0417,398.04,397.74,31,0.49,0.17 +2014,2,2014.125,398.27,397.46,27,0.51,0.19 +2014,3,2014.2083,399.91,398.38,22,0.84,0.34 +2014,4,2014.2917,401.51,398.64,26,0.5,0.19 +2014,5,2014.375,401.96,398.57,22,0.51,0.21 +2014,6,2014.4583,401.43,399.11,28,0.36,0.13 +2014,7,2014.5417,399.38,398.95,25,0.56,0.21 +2014,8,2014.625,397.32,399.2,21,0.22,0.09 +2014,9,2014.7083,395.64,399.2,21,0.56,0.24 +2014,10,2014.7917,396.29,399.69,24,0.75,0.29 +2014,11,2014.875,397.55,399.63,27,0.38,0.14 +2014,12,2014.9583,399.15,399.88,29,0.61,0.22 +2015,1,2015.0417,400.18,399.92,30,0.55,0.19 +2015,2,2015.125,400.55,399.78,28,0.63,0.23 +2015,3,2015.2083,401.74,400.23,24,1.02,0.4 +2015,4,2015.2917,403.34,400.47,26,0.86,0.32 +2015,5,2015.375,404.15,400.71,30,0.32,0.11 +2015,6,2015.4583,402.97,400.66,29,0.47,0.17 +2015,7,2015.5417,401.46,401.1,24,0.57,0.22 +2015,8,2015.625,399.11,401.03,28,0.74,0.27 +2015,9,2015.7083,397.82,401.43,25,0.32,0.12 +2015,10,2015.7917,398.49,401.88,28,0.56,0.2 +2015,11,2015.875,400.27,402.22,25,0.58,0.22 +2015,12,2015.9583,402.06,402.72,30,0.67,0.23 +2016,1,2016.0417,402.73,402.46,27,0.56,0.21 +2016,2,2016.125,404.25,403.41,25,1.11,0.43 +2016,3,2016.2083,405.06,403.55,28,0.81,0.29 +2016,4,2016.2917,407.6,404.78,23,1.04,0.41 +2016,5,2016.375,407.9,404.42,29,0.5,0.18 +2016,6,2016.4583,406.99,404.59,26,0.6,0.23 +2016,7,2016.5417,404.59,404.23,28,0.88,0.32 +2016,8,2016.625,402.45,404.39,24,0.6,0.23 +2016,9,2016.7083,401.23,404.84,25,0.44,0.17 +2016,10,2016.7917,401.79,405.22,29,0.3,0.11 +2016,11,2016.875,403.72,405.73,27,0.72,0.26 +2016,12,2016.9583,404.64,405.33,29,0.44,0.16 +2017,1,2017.0417,406.36,406.05,27,0.68,0.25 +2017,2,2017.125,406.66,405.82,26,0.71,0.27 +2017,3,2017.2083,407.54,406.06,24,1.03,0.4 +2017,4,2017.2917,409.22,406.38,26,0.86,0.32 +2017,5,2017.375,409.89,406.38,27,0.57,0.21 +2017,6,2017.4583,409.08,406.69,26,0.54,0.2 +2017,7,2017.5417,407.33,407,28,0.61,0.22 +2017,8,2017.625,405.32,407.29,29,0.32,0.12 +2017,9,2017.7083,403.57,407.16,26,0.37,0.14 +2017,10,2017.7917,403.82,407.21,27,0.3,0.11 +2017,11,2017.875,405.31,407.34,26,0.41,0.15 +2017,12,2017.9583,407,407.71,31,0.57,0.2 +2018,1,2018.0417,408.15,407.89,29,0.55,0.19 +2018,2,2018.125,408.52,407.65,28,0.52,0.19 +2018,3,2018.2083,409.59,408.09,29,0.65,0.23 +2018,4,2018.2917,410.45,407.65,21,0.9,0.38 +2018,5,2018.375,411.44,407.94,24,0.86,0.33 +2018,6,2018.4583,410.99,408.59,29,0.61,0.22 +2018,7,2018.5417,408.9,408.55,27,0.46,0.17 +2018,8,2018.625,407.16,409.07,31,0.28,0.1 +2018,9,2018.7083,405.71,409.28,29,0.45,0.16 +2018,10,2018.7917,406.19,409.61,30,0.32,0.11 +2018,11,2018.875,408.21,410.24,24,0.56,0.22 +2018,12,2018.9583,409.27,410.01,30,0.5,0.17 +2019,1,2019.0417,411.03,410.78,26,1.26,0.47 +2019,2,2019.125,411.96,411.09,27,1.14,0.42 +2019,3,2019.2083,412.18,410.68,28,1.12,0.4 +2019,4,2019.2917,413.54,410.74,27,0.6,0.22 +2019,5,2019.375,414.86,411.37,28,0.5,0.18 +2019,6,2019.4583,414.16,411.76,27,0.36,0.13 +2019,7,2019.5417,411.97,411.62,25,0.82,0.31 +2019,8,2019.625,410.18,412.09,29,0.33,0.12 +2019,9,2019.7083,408.79,412.36,29,0.35,0.13 +2019,10,2019.7917,408.75,412.17,29,0.31,0.11 +2019,11,2019.875,410.48,412.5,26,0.4,0.15 +2019,12,2019.9583,411.98,412.72,31,0.4,0.14 +2020,1,2020.0417,413.61,413.35,29,0.73,0.26 +2020,2,2020.125,414.34,413.47,28,0.69,0.25 +2020,3,2020.2083,414.74,413.24,26,0.33,0.12 +2020,4,2020.2917,416.45,413.65,28,0.65,0.24 +2020,5,2020.375,417.31,413.81,27,0.61,0.23 +2020,6,2020.4583,416.62,414.22,27,0.45,0.16 +2020,7,2020.5417,414.61,414.26,30,0.57,0.2 +2020,8,2020.625,412.78,414.69,25,0.25,0.1 +2020,9,2020.7083,411.52,415.1,29,0.31,0.11 +2020,10,2020.7917,411.51,414.92,30,0.22,0.08 +2020,11,2020.875,413.11,415.14,27,0.8,0.29 +2020,12,2020.9583,414.25,415,30,0.48,0.17 +2021,1,2021.0417,415.52,415.26,29,0.44,0.16 +2021,2,2021.125,416.75,415.88,28,1.01,0.36 diff --git a/pandas/tests/io/data/rdata/ppm_df.rds b/pandas/tests/io/data/rdata/ppm_df.rds new file mode 100644 index 0000000000000..242a3e2b11236 Binary files /dev/null and b/pandas/tests/io/data/rdata/ppm_df.rds differ diff --git a/pandas/tests/io/data/rdata/ppm_ts.rds b/pandas/tests/io/data/rdata/ppm_ts.rds new file mode 100644 index 0000000000000..3f49b7d24f6b0 Binary files /dev/null and b/pandas/tests/io/data/rdata/ppm_ts.rds differ diff --git a/pandas/tests/io/data/rdata/sea_ice_df.rds b/pandas/tests/io/data/rdata/sea_ice_df.rds new file mode 100644 index 0000000000000..23229ca9a87db Binary files /dev/null and b/pandas/tests/io/data/rdata/sea_ice_df.rds differ diff --git a/pandas/tests/io/data/rdata/species_mtx.rds b/pandas/tests/io/data/rdata/species_mtx.rds new file mode 100644 index 0000000000000..aa9ebe379e50a Binary files /dev/null and b/pandas/tests/io/data/rdata/species_mtx.rds differ diff --git a/pandas/tests/io/test_rdata.py b/pandas/tests/io/test_rdata.py new file mode 100644 index 0000000000000..9d552caa7ab4c --- /dev/null +++ b/pandas/tests/io/test_rdata.py @@ -0,0 +1,529 @@ +from io import BytesIO +import os +from urllib.error import HTTPError + +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.rdata import read_rdata + +pyreadr = pytest.importorskip("pyreadr") + + +ghg_df = DataFrame( + { + "gas": { + 141: "Carbon dioxide", + 142: "Methane", + 143: "Nitrous oxide", + 144: "Fluorinated gases", + 145: "Total", + }, + "year": {141: 2018, 142: 2018, 143: 2018, 144: 2018, 145: 2018}, + "emissions": { + 141: 5424.881502132882, + 142: 634.4571270782675, + 143: 434.52855537666636, + 144: 182.78243246177678, + 145: 6676.649617049592, + }, + } +).rename_axis("rownames") + +plants_df = DataFrame( + { + "plant_group": { + 16: "Pteridophytes", + 17: "Pteridophytes", + 18: "Pteridophytes", + 19: "Pteridophytes", + 20: "Pteridophytes", + }, + "status": { + 16: "Data Deficient", + 17: "Extinct", + 18: "Not Threatened", + 19: "Possibly Threatened", + 20: "Threatened", + }, + "count": {16: 398, 17: 65, 18: 1294, 19: 408, 20: 1275}, + } +).rename_axis("rownames") + +sea_ice_df = DataFrame( + { + "year": {1012: 2016, 1013: 2017, 1014: 2018, 1015: 2019, 1016: 2020}, + "mo": {1012: 12, 1013: 12, 1014: 12, 1015: 12, 1016: 12}, + "data.type": { + 1012: "Goddard", + 1013: "Goddard", + 1014: "Goddard", + 1015: "Goddard", + 1016: "NRTSI-G", + }, + "region": {1012: "S", 1013: "S", 1014: "S", 1015: "S", 1016: "S"}, + "extent": {1012: 8.28, 1013: 9.48, 1014: 9.19, 1015: 9.41, 1016: 10.44}, + "area": {1012: 5.51, 1013: 6.23, 1014: 5.59, 1015: 6.59, 1016: 6.5}, + } +).rename_axis("rownames") + + +@pytest.fixture(params=["rda", "rds"]) +def rtype(request): + return request.param + + +@pytest.fixture(params=[None, "gzip", "bz2", "xz"]) +def comp(request): + return request.param + + +def adj_int(df): + """ + Convert int32 columns to int64. + + Since pyreadr engine reads ints int int32 and writes ints + to floats this method converts such columns for testing. + """ + int_cols = df.select_dtypes("int32").columns + df[int_cols] = df[int_cols].astype("int64") + + if "index" in df.columns: + df["index"] = df["index"].astype("int64") + + if "year" in df.columns: + df["year"] = df["year"].astype("int64") + if "mo" in df.columns: + df["mo"] = df["mo"].astype("int64") + + return df + + +# RDA READER + +# PATH_OR_BUFFER + + +def test_read_rds_file(datapath): + filename = datapath("io", "data", "rdata", "ghg_df.rds") + r_df = read_rdata(filename) + output = adj_int(r_df).tail() + + tm.assert_frame_equal(ghg_df, output) + + +def test_read_rda_file(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + r_dfs = read_rdata(filename) + + r_dfs = {str(k): adj_int(v) for k, v in r_dfs.items()} + + assert list(r_dfs.keys()) == ["ghg_df", "plants_df", "sea_ice_df"] + + tm.assert_frame_equal(ghg_df, r_dfs["ghg_df"].tail()) + tm.assert_frame_equal(plants_df, r_dfs["plants_df"].tail()) + tm.assert_frame_equal(sea_ice_df, r_dfs["sea_ice_df"].tail()) + + +def test_read_rds_filelike(datapath): + filename = datapath("io", "data", "rdata", "sea_ice_df.rds") + + with open(filename, "rb") as f: + r_df = read_rdata(f, file_format="rds") + + output = adj_int(r_df).tail() + + tm.assert_frame_equal(sea_ice_df, output) + + +def test_read_rda_filelike(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + + with open(filename, "rb") as f: + r_dfs = read_rdata(f, file_format="rda") + + r_dfs = {str(k): adj_int(v) for k, v in r_dfs.items()} + + assert list(r_dfs.keys()) == ["ghg_df", "plants_df", "sea_ice_df"] + + tm.assert_frame_equal(ghg_df, r_dfs["ghg_df"].tail()) + tm.assert_frame_equal(plants_df, r_dfs["plants_df"].tail()) + tm.assert_frame_equal(sea_ice_df, r_dfs["sea_ice_df"].tail()) + + +def test_bytesio_rds(datapath): + filename = datapath("io", "data", "rdata", "sea_ice_df.rds") + + with open(filename, "rb") as f: + with BytesIO(f.read()) as b_io: + r_df = read_rdata(b_io, file_format="rds") + + output = adj_int(r_df).tail() + + tm.assert_frame_equal(sea_ice_df, output) + + +def test_bytesio_rda(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + + with open(filename, "rb") as f: + with BytesIO(f.read()) as b_io: + r_dfs = read_rdata(b_io, file_format="rda") + + r_dfs = {str(k): adj_int(v) for k, v in r_dfs.items()} + + assert list(r_dfs.keys()) == ["ghg_df", "plants_df", "sea_ice_df"] + + tm.assert_frame_equal(ghg_df, r_dfs["ghg_df"].tail()) + tm.assert_frame_equal(plants_df, r_dfs["plants_df"].tail()) + tm.assert_frame_equal(sea_ice_df, r_dfs["sea_ice_df"].tail()) + + +# FILE FORMAT + + +def test_read_wrong_format(datapath): + with pytest.raises(ValueError, match="not a valid value for file_format"): + filename = datapath("io", "data", "rdata", "plants_df.rds") + read_rdata(filename, file_format="r") + + +def test_read_wrong_file(): + with pytest.raises(FileNotFoundError, match="file cannot be found"): + filename = os.path.join("data", "rdata", "plants_df.rda") + read_rdata(filename) + + +def test_read_rds_non_df(datapath): + from pyreadr import custom_errors + + with pytest.raises( + custom_errors.LibrdataError, + match="Invalid file, or file has unsupported features", + ): + filename = datapath("io", "data", "rdata", "ppm_ts.rds") + read_rdata(filename) + + +def test_read_rda_non_dfs(datapath): + from pyreadr import custom_errors + + with pytest.raises( + custom_errors.LibrdataError, + match="Invalid file, or file has unsupported features", + ): + filename = datapath("io", "data", "rdata", "env_data_non_dfs.rda") + read_rdata(filename) + + +def test_read_not_rda_file(datapath): + from pyreadr import custom_errors + + with pytest.raises( + custom_errors.LibrdataError, match="The file contains an unrecognized object" + ): + filename = datapath("io", "data", "rdata", "ppm_df.csv") + read_rdata(filename, file_format="rda") + + +def test_bytes_read_infer_rds(datapath): + filename = datapath("io", "data", "rdata", "sea_ice_df.rds") + + with pytest.raises(ValueError, match="Unable to infer file format from file name"): + with open(filename, "rb") as f: + read_rdata(f) + + +def test_bytes_read_infer_rda(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + + with pytest.raises(ValueError, match="Unable to infer file format from file name"): + with open(filename, "rb") as f: + read_rdata(f) + + +# URL + + +@tm.network +def test_read_rda_url(): + url_df = DataFrame( + { + "carrier": {1: "9E", 2: "AA", 3: "AS", 4: "B6", 5: "DL"}, + "name": { + 1: "Endeavor Air Inc.", + 2: "American Airlines Inc.", + 3: "Alaska Airlines Inc.", + 4: "JetBlue Airways", + 5: "Delta Air Lines Inc.", + }, + } + ).rename_axis("rownames") + + url = ( + "https://github.com/hadley/nycflights13/blob/master/data/airlines.rda?raw=true" + ) + r_dfs = read_rdata(url, file_format="rda") + + tm.assert_frame_equal(url_df, r_dfs["airlines"].head()) + + +@tm.network +def test_read_unable_infer_format(): + with pytest.raises(ValueError, match="Unable to infer file format from file name"): + url = ( + "https://github.com/hadley/nycflights13/" + "blob/master/data/airlines.rda?raw=true" + ) + read_rdata(url) + + +@tm.network +def test_read_wrong_url(): + with pytest.raises(HTTPError, match="HTTP Error 404: Not Found"): + url = "https://example.com/data.rdata" + read_rdata(url) + + +# S3 + + +@tm.network +@td.skip_if_no("s3fs") +def test_read_rda_s3(): + s3 = "s3://assets.datacamp.com/production/course_1478/datasets/wine.RData" + s3_df = DataFrame( + { + "Alcohol": {1: 13.2, 2: 13.16, 3: 14.37, 4: 13.24, 5: 14.2}, + "Malic.acid": {1: 1.78, 2: 2.36, 3: 1.95, 4: 2.59, 5: 1.76}, + "Ash": {1: 2.14, 2: 2.67, 3: 2.5, 4: 2.87, 5: 2.45}, + "Alcalinity.of.ash": {1: 11.2, 2: 18.6, 3: 16.8, 4: 21.0, 5: 15.2}, + "Magnesium": {1: 100, 2: 101, 3: 113, 4: 118, 5: 112}, + "Total.phenols": {1: 2.65, 2: 2.8, 3: 3.85, 4: 2.8, 5: 3.27}, + "Flavanoids": {1: 2.76, 2: 3.24, 3: 3.49, 4: 2.69, 5: 3.39}, + "Nonflavanoid.phenols": {1: 0.26, 2: 0.3, 3: 0.24, 4: 0.39, 5: 0.34}, + "Proanthocyanins": {1: 1.28, 2: 2.81, 3: 2.18, 4: 1.82, 5: 1.97}, + "Color.intensity": {1: 4.38, 2: 5.68, 3: 7.8, 4: 4.32, 5: 6.75}, + "Hue": {1: 3.4, 2: 3.17, 3: 3.45, 4: 2.93, 5: 2.85}, + "Proline": {1: 1050, 2: 1185, 3: 1480, 4: 735, 5: 1450}, + } + ).rename_axis("rownames") + r_dfs = read_rdata(s3) + r_dfs["wine"] = adj_int(r_dfs["wine"]) + + # pyreadr remove dots in colnames + r_dfs["wine"].columns = r_dfs["wine"].columns.str.replace(" ", ".") + + tm.assert_frame_equal(s3_df, r_dfs["wine"].head()) + + +# ENGINE + + +def test_read_rds_df_output(datapath): + filename = datapath("io", "data", "rdata", "sea_ice_df.rds") + r_df = read_rdata(filename) + + assert isinstance(r_df, DataFrame) + + +def test_read_rda_dict_output(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + r_dfs = read_rdata(filename) + + assert isinstance(r_dfs, dict) + assert list(r_dfs.keys()) == ["ghg_df", "plants_df", "sea_ice_df"] + + +# SELECT_FRAMES + + +def test_read_select_frames_rda_dfs(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + r_dfs = read_rdata(filename, select_frames=["ghg_df", "sea_ice_df"]) + + assert "plants_df" not in list(r_dfs.keys()) + assert "ghg_df" in list(r_dfs.keys()) + assert "sea_ice_df" in list(r_dfs.keys()) + + +def test_read_wrong_select_frames(datapath): + with pytest.raises(TypeError, match="not a valid type for select_frames"): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + read_rdata(filename, select_frames="plants_df") + + +# ROWNAMES + + +def test_read_rownames_true_rds(datapath): + filename = datapath("io", "data", "rdata", "sea_ice_df.rds") + r_df = read_rdata(filename, rownames=True) + + if isinstance(r_df, DataFrame): + assert r_df.index.name == "rownames" + + +def test_read_rownames_false_rds(datapath): + filename = datapath("io", "data", "rdata", "sea_ice_df.rds") + r_df = read_rdata(filename, rownames=False) + + if isinstance(r_df, DataFrame): + assert r_df.index.name != "rownames" + + +def test_read_rownames_true_rda(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + r_dfs = read_rdata(filename, rownames=True) + + assert r_dfs["ghg_df"].index.name == "rownames" + assert r_dfs["plants_df"].index.name == "rownames" + assert r_dfs["sea_ice_df"].index.name == "rownames" + + +def test_read_rownames_false_rda(datapath): + filename = datapath("io", "data", "rdata", "env_data_dfs.rda") + r_dfs = read_rdata(filename, rownames=False) + + assert r_dfs["ghg_df"].index.name != "rownames" + assert r_dfs["plants_df"].index.name != "rownames" + assert r_dfs["sea_ice_df"].index.name != "rownames" + + +# ENCODING + + +def test_non_utf8_data(datapath, rtype): + filename = datapath("io", "data", "rdata", f"climate_non_utf8_df.{rtype}") + with pytest.raises(UnicodeDecodeError, match=("'utf-8' codec can't decode byte")): + read_rdata(filename) + + +# RDA WRITER + +# PATH_OR_BUFFER + + +def test_write_read_file(rtype): + with tm.ensure_clean("test.out") as path: + ghg_df.to_rdata(path, file_format=rtype, index=False) + r_dfs = read_rdata(path, file_format=rtype, rownames=False) + + expected = ghg_df.reset_index(drop=True) + output = ( + adj_int(r_dfs["pandas_dataframe"]) if rtype == "rda" else adj_int(r_dfs) + ) + + tm.assert_frame_equal(output, expected) + + +def test_write_read_pathlib(rtype): + from pathlib import Path + + with tm.ensure_clean_dir() as tmp_dir: + tmp_file = Path(tmp_dir).joinpath("test.out") + sea_ice_df.to_rdata(tmp_file, file_format=rtype, index=False) + r_dfs = read_rdata(tmp_file, file_format=rtype, rownames=False) + + expected = sea_ice_df.reset_index(drop=True) + output = ( + adj_int(r_dfs["pandas_dataframe"]) if rtype == "rda" else adj_int(r_dfs) + ) + + tm.assert_frame_equal(output, expected) + + +def test_write_read_filelike(rtype): + with BytesIO() as b_io: + sea_ice_df.to_rdata(b_io, file_format=rtype, index=False) + r_dfs = read_rdata( + b_io.getvalue(), + file_format=rtype, + rownames=False, + ) + + expected = sea_ice_df.reset_index(drop=True) + output = ( + adj_int(r_dfs["pandas_dataframe"]) if rtype == "rda" else adj_int(r_dfs) + ) + + tm.assert_frame_equal(output, expected) + + +# FILE FORMAT + + +def test_write_wrong_format(): + with tm.ensure_clean("test.rda") as path: + with pytest.raises(ValueError, match=("not a valid value for file_format")): + ghg_df.to_rdata(path, file_format="csv") + + +def test_write_unable_to_infer(): + with tm.ensure_clean("test") as path: + with pytest.raises( + ValueError, match=("Unable to infer file format from file name") + ): + ghg_df.to_rdata(path) + + +# INDEX + + +def test_index_true(rtype): + with tm.ensure_clean("test.out") as path: + plants_df.rename_axis(None).to_rdata(path, file_format=rtype, index=True) + r_dfs = read_rdata(path, file_format=rtype) + + r_df = r_dfs if rtype == "rds" else r_dfs["pandas_dataframe"] + + if isinstance(r_df, DataFrame): + assert "index" in r_df.columns + + +def test_index_false(rtype): + with tm.ensure_clean("test.out") as path: + plants_df.rename_axis(None).to_rdata(path, file_format=rtype, index=False) + r_dfs = read_rdata(path, file_format=rtype) + + r_df = r_dfs if rtype == "rds" else r_dfs["pandas_dataframe"] + + if isinstance(r_df, DataFrame): + assert "index" not in r_df.columns + + +# COMPRESS + + +def test_compress_ok_comp(rtype, comp): + with tm.ensure_clean("test.out") as path: + ghg_df.to_rdata(path, file_format=rtype, compression=comp, index=False) + r_dfs = read_rdata(path, file_format=rtype, rownames=False) + + expected = ghg_df.reset_index(drop=True) + output = ( + adj_int(r_dfs["pandas_dataframe"]) if rtype == "rda" else adj_int(r_dfs) + ) + + tm.assert_frame_equal(output, expected) + + +def test_compress_zip(rtype): + with tm.ensure_clean("test.out") as path: + with pytest.raises(ValueError, match=("not a supported value for compression")): + ghg_df.to_rdata(path, file_format=rtype, index=False, compression="zip") + + +# RDA_NAMES + + +def test_new_rda_name(): + with tm.ensure_clean("test.rda") as path: + ghg_df.to_rdata(path, rda_name="py_df") + r_dfs = read_rdata(path) + + assert "py_df" in list(r_dfs.keys()) diff --git a/requirements-dev.txt b/requirements-dev.txt index 33deeef9f1f82..55e6e8aa25c5b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -76,6 +76,7 @@ sqlalchemy xarray cftime pyreadstat +pyreadr tabulate>=0.8.3 natsort git+https://github.com/pydata/pydata-sphinx-theme.git@master