From b95d3e5ab0823f4e9aa4566ecd50172363a49f3f Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Mon, 23 Dec 2019 02:15:13 +0700 Subject: [PATCH 1/4] CLN: str.format -> f-strings for `io/sas` --- pandas/io/sas/sas7bdat.py | 27 +++++++++++---------------- pandas/io/sas/sas_xport.py | 4 ++-- pandas/tests/io/sas/test_sas7bdat.py | 16 ++++++++-------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index eb57d703cd4d5..3d93c293893fc 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -169,7 +169,7 @@ def _get_properties(self): if buf in const.encoding_names: self.file_encoding = const.encoding_names[buf] else: - self.file_encoding = "unknown (code={name!s})".format(name=buf) + self.file_encoding = f"unknown (code={str(buf)})" # Get platform information buf = self._read_bytes(const.platform_offset, const.platform_length) @@ -293,8 +293,8 @@ def _read_bytes(self, offset, length): buf = self._path_or_buf.read(length) if len(buf) < length: self.close() - msg = "Unable to read {:d} bytes from file position {:d}." - raise ValueError(msg.format(length, offset)) + msg = f"Unable to read {length:d} bytes from file position {offset:d}." + raise ValueError(msg) return buf else: if offset + length > len(self._cached_page): @@ -456,14 +456,9 @@ def _process_columnsize_subheader(self, offset, length): offset += int_len self.column_count = self._read_int(offset, int_len) if self.col_count_p1 + self.col_count_p2 != self.column_count: - print( - "Warning: column count mismatch ({p1} + {p2} != " - "{column_count})\n".format( - p1=self.col_count_p1, - p2=self.col_count_p2, - column_count=self.column_count, - ) - ) + print(f"Warning: column count mismatch ({self.col_count_p1} + " + f"{self.col_count_p2} != " + f"{self.column_count})\n") # Unknown purpose def _process_subheader_counts(self, offset, length): @@ -672,8 +667,10 @@ def _read_next_page(self): return True elif len(self._cached_page) != self._page_length: self.close() - msg = "failed to read complete page from file (read {:d} of {:d} bytes)" - raise ValueError(msg.format(len(self._cached_page), self._page_length)) + msg = ("failed to read complete page from file (read " + f"{len(self._cached_page):d} of " + f"{self._page_length:d} bytes)") + raise ValueError(msg) self._read_page_header() page_type = self._current_page_type @@ -725,8 +722,6 @@ def _chunk_to_dataframe(self): js += 1 else: self.close() - raise ValueError( - "unknown column type {type}".format(type=self._column_types[j]) - ) + raise ValueError(f"unknown column type {self._column_types[j]}") return rslt diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 9aa8ed1dfeb5d..777d38be04775 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -367,8 +367,8 @@ def _read_header(self): fl = field["field_length"] if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)): self.close() - msg = "Floating field width {0} is not between 2 and 8." - raise TypeError(msg.format(fl)) + msg = "f{Floating field width {fl} is not between 2 and 8.}" + raise TypeError(msg) for k, v in field.items(): try: diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 49af18d2935ef..339b62fb9252b 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -21,7 +21,7 @@ def setup_method(self, datapath): self.data = [] self.test_ix = [list(range(1, 16)), [16]] for j in 1, 2: - fname = os.path.join(self.dirpath, "test_sas7bdat_{j}.csv".format(j=j)) + fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv") df = pd.read_csv(fname) epoch = pd.datetime(1960, 1, 1) t1 = pd.to_timedelta(df["Column4"], unit="d") @@ -38,7 +38,7 @@ def test_from_file(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: - fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k)) + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") df = pd.read_sas(fname, encoding="utf-8") tm.assert_frame_equal(df, df0) @@ -46,7 +46,7 @@ def test_from_buffer(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: - fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k)) + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") with open(fname, "rb") as f: byts = f.read() buf = io.BytesIO(byts) @@ -61,7 +61,7 @@ def test_from_iterator(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: - fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k)) + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") rdr = pd.read_sas(fname, iterator=True, encoding="utf-8") df = rdr.read(2) tm.assert_frame_equal(df, df0.iloc[0:2, :]) @@ -73,7 +73,7 @@ def test_path_pathlib(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: - fname = Path(os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k))) + fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat")) df = pd.read_sas(fname, encoding="utf-8") tm.assert_frame_equal(df, df0) @@ -85,7 +85,7 @@ def test_path_localpath(self): df0 = self.data[j] for k in self.test_ix[j]: fname = LocalPath( - os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k)) + os.path.join(self.dirpath, f"test{k}.sas7bdat") ) df = pd.read_sas(fname, encoding="utf-8") tm.assert_frame_equal(df, df0) @@ -95,7 +95,7 @@ def test_iterator_loop(self): for j in 0, 1: for k in self.test_ix[j]: for chunksize in 3, 5, 10, 11: - fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k)) + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") rdr = pd.read_sas(fname, chunksize=10, encoding="utf-8") y = 0 for x in rdr: @@ -106,7 +106,7 @@ def test_iterator_loop(self): def test_iterator_read_too_much(self): # github #14734 k = self.test_ix[0][0] - fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k)) + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") rdr = pd.read_sas(fname, format="sas7bdat", iterator=True, encoding="utf-8") d1 = rdr.read(rdr.row_count + 20) rdr.close() From cdbd94fe413d81a46cf647f89214e005e3f99db3 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Mon, 23 Dec 2019 02:35:49 +0700 Subject: [PATCH 2/4] Apply black style --- pandas/io/sas/sas7bdat.py | 16 ++++++++++------ pandas/tests/io/sas/test_sas7bdat.py | 4 +--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 3d93c293893fc..9767da5983887 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -456,9 +456,11 @@ def _process_columnsize_subheader(self, offset, length): offset += int_len self.column_count = self._read_int(offset, int_len) if self.col_count_p1 + self.col_count_p2 != self.column_count: - print(f"Warning: column count mismatch ({self.col_count_p1} + " - f"{self.col_count_p2} != " - f"{self.column_count})\n") + print( + f"Warning: column count mismatch ({self.col_count_p1} + " + f"{self.col_count_p2} != " + f"{self.column_count})\n" + ) # Unknown purpose def _process_subheader_counts(self, offset, length): @@ -667,9 +669,11 @@ def _read_next_page(self): return True elif len(self._cached_page) != self._page_length: self.close() - msg = ("failed to read complete page from file (read " - f"{len(self._cached_page):d} of " - f"{self._page_length:d} bytes)") + msg = ( + "failed to read complete page from file (read " + f"{len(self._cached_page):d} of " + f"{self._page_length:d} bytes)" + ) raise ValueError(msg) self._read_page_header() diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 339b62fb9252b..d3480b246b91f 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -84,9 +84,7 @@ def test_path_localpath(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: - fname = LocalPath( - os.path.join(self.dirpath, f"test{k}.sas7bdat") - ) + fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat")) df = pd.read_sas(fname, encoding="utf-8") tm.assert_frame_equal(df, df0) From 0916a04c1a6a69beda665bd17e621498cbf6ea2f Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Mon, 23 Dec 2019 21:38:21 +0700 Subject: [PATCH 3/4] Fix syntax error --- pandas/io/sas/sas_xport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 777d38be04775..e4a06c794271d 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -367,7 +367,7 @@ def _read_header(self): fl = field["field_length"] if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)): self.close() - msg = "f{Floating field width {fl} is not between 2 and 8.}" + msg = f"Floating field width {fl} is not between 2 and 8." raise TypeError(msg) for k, v in field.items(): From 0730a0c1a9094f0f595fea5e225579fc84507cf6 Mon Sep 17 00:00:00 2001 From: Jethro Cao Date: Tue, 24 Dec 2019 02:12:46 +0700 Subject: [PATCH 4/4] Remove `str()` call --- pandas/io/sas/sas7bdat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 9767da5983887..c6a28c1fa813d 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -169,7 +169,7 @@ def _get_properties(self): if buf in const.encoding_names: self.file_encoding = const.encoding_names[buf] else: - self.file_encoding = f"unknown (code={str(buf)})" + self.file_encoding = f"unknown (code={buf})" # Get platform information buf = self._read_bytes(const.platform_offset, const.platform_length)