From 292ed83dd07e8405d8104fc9772fc041cff08d91 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 17 Sep 2022 14:55:17 +0200 Subject: [PATCH 1/3] PERF: Avoid fragmentation of DataFrame in read_sas --- pandas/io/sas/sas_xport.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 648c58dee6600..a5df2bde38096 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -481,7 +481,7 @@ def read(self, nrows: int | None = None) -> pd.DataFrame: raw = self.filepath_or_buffer.read(read_len) data = np.frombuffer(raw, dtype=self._dtype, count=read_lines) - df = pd.DataFrame(index=range(read_lines)) + df_data = {} for j, x in enumerate(self.columns): vec = data["s" + str(j)] ntype = self.fields[j]["ntype"] @@ -496,7 +496,8 @@ def read(self, nrows: int | None = None) -> pd.DataFrame: if self._encoding is not None: v = [y.decode(self._encoding) for y in v] - df[x] = v + df_data.update({x: v}) + df = pd.DataFrame(df_data) if self._index is None: df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines)) From cf3185af174ba1cc3ebefe1ba2d29a82e0cbfd5c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 17 Sep 2022 14:56:37 +0200 Subject: [PATCH 2/3] Add whatsnew --- doc/source/whatsnew/v1.6.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 405b8cc0a5ded..40fecceee2e71 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -182,7 +182,7 @@ MultiIndex I/O ^^^ -- +- Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` (:issue:`48595`) - Period From 347876e4d20cb27bc7accf56f62f80ebeb9f7d2b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 20 Sep 2022 18:53:55 +0200 Subject: [PATCH 3/3] Add warning --- doc/source/whatsnew/v1.6.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 40fecceee2e71..cc71623a6188f 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -182,7 +182,7 @@ MultiIndex I/O ^^^ -- Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` (:issue:`48595`) +- Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`) - Period