From 7827ede6b406e1edea5764a911e2891cb276f725 Mon Sep 17 00:00:00 2001 From: Vitor Serpa <83547959+vrserpa@users.noreply.github.com> Date: Mon, 21 Jun 2021 15:03:07 -0300 Subject: [PATCH 1/2] PERF: Use list comprehension to join strings (#41753) --- asv_bench/benchmarks/io/csv.py | 8 ++++++-- doc/source/user_guide/io.rst | 2 +- pandas/_config/config.py | 2 +- pandas/core/computation/engines.py | 2 +- pandas/core/computation/parsing.py | 2 +- pandas/core/computation/pytables.py | 2 +- pandas/core/computation/scope.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/io/formats/excel.py | 2 +- pandas/io/formats/latex.py | 8 ++++---- pandas/io/formats/style.py | 2 +- pandas/io/formats/xml.py | 16 ++++++++-------- pandas/io/html.py | 4 ++-- pandas/io/parsers/base_parser.py | 4 ++-- pandas/io/parsers/python_parser.py | 2 +- pandas/io/pytables.py | 10 +++++----- pandas/io/sql.py | 6 +++--- pandas/io/stata.py | 4 ++-- pandas/plotting/_core.py | 2 +- pandas/plotting/_matplotlib/core.py | 2 +- pandas/tests/generic/test_frame.py | 2 +- pandas/tests/generic/test_series.py | 2 +- pandas/tests/indexes/period/test_indexing.py | 5 ++--- pandas/tests/io/formats/style/test_align.py | 2 +- pandas/tests/io/formats/test_format.py | 2 +- pandas/tests/io/parser/test_c_parser_only.py | 10 +++++----- pandas/tests/io/parser/test_multi_thread.py | 2 +- pandas/tests/io/parser/test_na_values.py | 2 +- pandas/tests/io/parser/test_skiprows.py | 4 ++-- pandas/tests/io/test_sql.py | 2 +- pandas/util/_decorators.py | 12 +++++++----- pandas/util/_print_versions.py | 2 +- pandas/util/version/__init__.py | 8 ++++---- scripts/tests/test_validate_docstrings.py | 2 +- 35 files changed, 74 insertions(+), 69 deletions(-) diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 5ff9431fbf8e4..c10dd8c7762ca 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -291,7 +291,11 @@ class ReadCSVFloatPrecision(StringIORewind): def setup(self, sep, decimal, float_precision): floats = [ - "".join(random.choice(string.digits) for _ in range(28)) for _ in range(15) + "".join( + [ + random.choice(string.digits) for _ in range(28) + ] + ) for _ in range(15) ] rows = sep.join([f"0{decimal}" + "{}"] * 3) + "\n" data = rows * 5 @@ -395,7 +399,7 @@ class ReadCSVCachedParseDates(StringIORewind): param_names = ["do_cache", "engine"] def setup(self, do_cache, engine): - data = ("\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n") * 10 + data = ("\n".join([f"10/{year}" for year in range(2000, 2100)]) + "\n") * 10 self.StringIO_input = StringIO(data) def time_read_csv_cached(self, do_cache, engine): diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c2b030d732ba9..22aff1d8b3829 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5689,7 +5689,7 @@ Example of a callable using PostgreSQL `COPY clause writer.writerows(data_iter) s_buf.seek(0) - columns = ', '.join('"{}"'.format(k) for k in keys) + columns = ', '.join(['"{}"'.format(k) for k in keys]) if table.schema: table_name = '{}.{}'.format(table.schema, table.name) else: diff --git a/pandas/_config/config.py b/pandas/_config/config.py index be3498dc0829b..ed48ff7ae08c6 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -157,7 +157,7 @@ def _describe_option(pat: str = "", _print_desc: bool = True): if len(keys) == 0: raise OptionError("No such keys(s)") - s = "\n".join(_build_option_description(k) for k in keys) + s = "\n".join([_build_option_description(k) for k in keys]) if _print_desc: print(s) diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index 62732402dbeea..ec3548c9efc6c 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -37,7 +37,7 @@ def _check_ne_builtin_clash(expr: Expr) -> None: overlap = names & _ne_builtins if overlap: - s = ", ".join(repr(x) for x in overlap) + s = ", ".join([repr(x) for x in overlap]) raise NumExprClobberingError( f'Variables in expression "{expr}" overlap with builtins: ({s})' ) diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index b0f817d2c1ff3..5e000116d19f2 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -57,7 +57,7 @@ def create_valid_python_identifier(name: str) -> str: } ) - name = "".join(special_characters_replacements.get(char, char) for char in name) + name = "".join([special_characters_replacements.get(char, char) for char in name]) name = "BACKTICK_QUOTED_STRING_" + name if not name.isidentifier(): diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index f733a5c43dfb3..8b0678a7b1494 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -579,7 +579,7 @@ def __init__( else: w = _validate_where(w) where[idx] = w - _where = " & ".join(f"({w})" for w in com.flatten(where)) + _where = " & ".join([f"({w})" for w in com.flatten(where)]) else: # _validate_where ensures we otherwise have a string _where = where diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 09067e7eba6e5..426cd8fd81f28 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -50,7 +50,7 @@ def _raw_hex_id(obj) -> str: """Return the padded hexadecimal id of ``obj``.""" # interpret as a pointer since that's what really what id returns packed = struct.pack("@P", id(obj)) - return "".join(_replacer(x) for x in packed) + return "".join([_replacer(x) for x in packed]) DEFAULT_GLOBALS = { diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5bd845534fc96..fe9243b00b0ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11036,7 +11036,7 @@ def last_valid_index(self) -> Hashable | None: def _doc_params(cls): """Return a tuple of the doc params.""" axis_descr = ( - f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}" + f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}" ) name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" name2 = cls.__name__ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 237d06402a0ee..a1eb542e3362c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -281,7 +281,7 @@ def __repr__(self) -> str: result = f"{name}: {len(self)} dtype: {self.dtype}" else: - shape = " x ".join(str(s) for s in self.shape) + shape = " x ".join([str(s) for s in self.shape]) result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}" return result diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index b285fa5f315ed..0c625e8a68db0 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -769,7 +769,7 @@ def _generate_body(self, coloffset: int) -> Iterable[ExcelCell]: series = self.df.iloc[:, colidx] for i, val in enumerate(series): if styles is not None: - css = ";".join(a + ":" + str(v) for (a, v) in styles[i, colidx]) + css = ";".join([a + ":" + str(v) for (a, v) in styles[i, colidx]]) xlstyle = self.style_converter(css) yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index e9e2b830e32cb..93069a1e2955d 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -358,7 +358,7 @@ def get_result(self) -> str: self.bottom_separator, self.env_end, ] - result = "\n".join(item for item in elements if item) + result = "\n".join([item for item in elements if item]) trailing_newline = "\n" result += trailing_newline return result @@ -527,13 +527,13 @@ def env_begin(self) -> str: f"\\begin{{longtable}}{self._position_macro}{{{self.column_format}}}" ) elements = [first_row, f"{self._caption_and_label()}"] - return "\n".join(item for item in elements if item) + return "\n".join([item for item in elements if item]) def _caption_and_label(self) -> str: if self.caption or self.label: double_backslash = "\\\\" elements = [f"{self._caption_macro}", f"{self._label_macro}"] - caption_and_label = "\n".join(item for item in elements if item) + caption_and_label = "\n".join([item for item in elements if item]) caption_and_label += double_backslash return caption_and_label else: @@ -611,7 +611,7 @@ def env_begin(self) -> str: f"{self._label_macro}", f"\\begin{{tabular}}{{{self.column_format}}}", ] - return "\n".join(item for item in elements if item) + return "\n".join([item for item in elements if item]) @property def bottom_separator(self) -> str: diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c03275b565fd4..99f7fe3889ac1 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2018,7 +2018,7 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler: >>> df.style.set_properties(color="white", align="right") >>> df.style.set_properties(**{'background-color': 'yellow'}) """ - values = "".join(f"{p}: {v};" for p, v in kwargs.items()) + values = "".join([f"{p}: {v};" for p, v in kwargs.items()]) return self.applymap(lambda x: values, subset=subset) @staticmethod diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index 5be6ae0382d87..d2b86cc458b74 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -357,9 +357,9 @@ def build_attribs(self) -> None: flat_col = col if isinstance(col, tuple): flat_col = ( - "".join(str(c) for c in col).strip() + "".join([str(c) for c in col]).strip() if "" in col - else "_".join(str(c) for c in col).strip() + else "_".join([str(c) for c in col]).strip() ) attr_name = f"{self.prefix_uri}{flat_col}" @@ -384,9 +384,9 @@ def build_elems(self) -> None: flat_col = col if isinstance(col, tuple): flat_col = ( - "".join(str(c) for c in col).strip() + "".join([str(c) for c in col]).strip() if "" in col - else "_".join(str(c) for c in col).strip() + else "_".join([str(c) for c in col]).strip() ) elem_name = f"{self.prefix_uri}{flat_col}" @@ -529,9 +529,9 @@ def build_attribs(self) -> None: flat_col = col if isinstance(col, tuple): flat_col = ( - "".join(str(c) for c in col).strip() + "".join([str(c) for c in col]).strip() if "" in col - else "_".join(str(c) for c in col).strip() + else "_".join([str(c) for c in col]).strip() ) attr_name = f"{self.prefix_uri}{flat_col}" @@ -556,9 +556,9 @@ def build_elems(self) -> None: flat_col = col if isinstance(col, tuple): flat_col = ( - "".join(str(c) for c in col).strip() + "".join([str(c) for c in col]).strip() if "" in col - else "_".join(str(c) for c in col).strip() + else "_".join([str(c) for c in col]).strip() ) elem_name = f"{self.prefix_uri}{flat_col}" diff --git a/pandas/io/html.py b/pandas/io/html.py index 0a91d065379cb..2947b22f85d61 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -627,7 +627,7 @@ def _build_xpath_expr(attrs) -> str: if "class_" in attrs: attrs["class"] = attrs.pop("class_") - s = " and ".join(f"@{k}={repr(v)}" for k, v in attrs.items()) + s = " and ".join([f"@{k}={repr(v)}" for k, v in attrs.items()]) return f"[{s}]" @@ -861,7 +861,7 @@ def _parser_dispatch(flavor): def _print_as_set(s) -> str: - arg = ", ".join(pprint_thing(el) for el in s) + arg = ", ".join([pprint_thing(el) for el in s]) return f"{{{arg}}}" diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index f914e0601fb89..980c75b370bd5 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -351,7 +351,7 @@ def extract(r): # level, then our header was too long. for n in range(len(columns[0])): if all(ensure_str(col[n]) in self.unnamed_cols for col in columns): - header = ",".join(str(x) for x in self.header) + header = ",".join([str(x) for x in self.header]) raise ParserError( f"Passed header=[{header}] are too many rows " "for this multi_index of columns" @@ -1138,7 +1138,7 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns): else: colnames.append(c) - new_name = "_".join(str(x) for x in colnames) + new_name = "_".join([str(x) for x in colnames]) to_parse = [data_dict[c] for c in colnames if c in data_dict] new_col = parser(*to_parse) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 13f2d62399418..7c9fcde08bf24 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -1159,7 +1159,7 @@ def get_rows(self, infer_nrows, skiprows=None): def detect_colspecs(self, infer_nrows=100, skiprows=None): # Regex escape the delimiters - delimiters = "".join(fr"\{x}" for x in self.delimiter) + delimiters = "".join([fr"\{x}" for x in self.delimiter]) pattern = re.compile(f"([^{delimiters}]+)") rows = self.get_rows(infer_nrows, skiprows) if not rows: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1b4bd62ee7db7..16e4c19701d55 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2642,7 +2642,7 @@ def __repr__(self) -> str: s = self.shape if s is not None: if isinstance(s, (list, tuple)): - jshape = ",".join(pprint_thing(x) for x in s) + jshape = ",".join([pprint_thing(x) for x in s]) s = f"[{jshape}]" return f"{self.pandas_type:12.12} (shape->{s})" return self.pandas_type @@ -3309,10 +3309,10 @@ def __repr__(self) -> str: ver = "" if self.is_old_version: - jver = ".".join(str(x) for x in self.version) + jver = ".".join([str(x) for x in self.version]) ver = f"[{jver}]" - jindex_axes = ",".join(a.name for a in self.index_axes) + jindex_axes = ",".join([a.name for a in self.index_axes]) return ( f"{self.pandas_type:12.12}{ver} " f"(typ->{self.table_type_short},nrows->{self.nrows}," @@ -3519,7 +3519,7 @@ def validate_version(self, where=None): """are we trying to operate on an old version?""" if where is not None: if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1: - ws = incompatibility_doc % ".".join(str(x) for x in self.version) + ws = incompatibility_doc % ".".join([str(x) for x in self.version]) warnings.warn(ws, IncompatibilityWarning) def validate_min_itemsize(self, min_itemsize): @@ -4066,7 +4066,7 @@ def get_blk_items(mgr): new_blocks.append(b) new_blk_items.append(b_items) except (IndexError, KeyError) as err: - jitems = ",".join(pprint_thing(item) for item in items) + jitems = ",".join([pprint_thing(item) for item in items]) raise ValueError( f"cannot match existing table structure for [{jitems}] " "on appending data" diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b9d5b18b85e02..7f982de905c00 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1913,7 +1913,7 @@ def insert_statement(self, *, num_rows: int): col_names = ",".join(bracketed_names) row_wildcards = ",".join([wld] * len(names)) - wildcards = ",".join(f"({row_wildcards})" for _ in range(num_rows)) + wildcards = ",".join([f"({row_wildcards})" for _ in range(num_rows)]) insert_statement = ( f"INSERT INTO {escape(self.name)} ({col_names}) VALUES {wildcards}" ) @@ -1952,7 +1952,7 @@ def _create_table_setup(self): keys = [self.keys] else: keys = self.keys - cnames_br = ", ".join(escape(c) for c in keys) + cnames_br = ", ".join([escape(c) for c in keys]) create_tbl_stmts.append( f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})" ) @@ -1972,7 +1972,7 @@ def _create_table_setup(self): ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index] if len(ix_cols): cnames = "_".join(ix_cols) - cnames_br = ",".join(escape(c) for c in ix_cols) + cnames_br = ",".join([escape(c) for c in ix_cols]) create_stmts.append( "CREATE INDEX " + escape("ix_" + self.name + "_" + cnames) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index ffaebb3c10ae2..bb8122b12fdbc 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1357,12 +1357,12 @@ def _read_old_header(self, first_char: bytes) -> None: try: self.typlist = [self.TYPE_MAP[typ] for typ in typlist] except ValueError as err: - invalid_types = ",".join(str(x) for x in typlist) + invalid_types = ",".join([str(x) for x in typlist]) raise ValueError(f"cannot convert stata types [{invalid_types}]") from err try: self.dtyplist = [self.DTYPE_MAP[typ] for typ in typlist] except ValueError as err: - invalid_dtypes = ",".join(str(x) for x in typlist) + invalid_dtypes = ",".join([str(x) for x in typlist]) raise ValueError(f"cannot convert stata dtypes [{invalid_dtypes}]") from err if self.format_version > 108: diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 302d5ede0ae86..b3ae13b588e19 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -866,7 +866,7 @@ def _get_call_args(backend_name, data, args, kwargs): if args and isinstance(data, ABCSeries): positional_args = str(args)[1:-1] keyword_args = ", ".join( - f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args) + [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)] ) msg = ( "`Series.plot()` should not be called with positional " diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7ddab91a24ec0..690e39de2ddb2 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -714,7 +714,7 @@ def _get_index_name(self) -> str | None: if isinstance(self.data.index, ABCMultiIndex): name = self.data.index.names if com.any_not_none(*name): - name = ",".join(pprint_thing(x) for x in name) + name = ",".join([pprint_thing(x) for x in name]) else: name = None else: diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 103489e4abe98..49a1dc8bbb21c 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -126,7 +126,7 @@ def finalize(self, other, method=None, **kwargs): for name in self._metadata: if method == "concat": value = "+".join( - getattr(o, name) for o in other.objs if getattr(o, name, None) + [getattr(o, name) for o in other.objs if getattr(o, name, None)] ) object.__setattr__(self, name, value) else: diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 755081349170d..6e5cb3add43df 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -130,7 +130,7 @@ def finalize(self, other, method=None, **kwargs): for name in self._metadata: if method == "concat" and name == "filename": value = "+".join( - getattr(o, name) for o in other.objs if getattr(o, name, None) + [getattr(o, name) for o in other.objs if getattr(o, name, None)] ) object.__setattr__(self, name, value) else: diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index a41d02cfbd394..7decd1ffe7dff 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -513,11 +513,10 @@ def test_get_indexer_mismatched_dtype_with_method(self, non_comparable_idx, meth continue # Two different error message patterns depending on dtypes msg = "|".join( - re.escape(msg) - for msg in ( + [re.escape(msg) for msg in ( f"Cannot compare dtypes {pi.dtype} and {other.dtype}", " not supported between instances of ", - ) + )] ) with pytest.raises(TypeError, match=msg): pi.get_indexer(other2, method=method) diff --git a/pandas/tests/io/formats/style/test_align.py b/pandas/tests/io/formats/style/test_align.py index f81c1fbd6d85e..ce26084466a4d 100644 --- a/pandas/tests/io/formats/style/test_align.py +++ b/pandas/tests/io/formats/style/test_align.py @@ -13,7 +13,7 @@ def bar_grad(a=None, b=None, c=None, d=None): return ret + [ ( "background", - f"linear-gradient(90deg,{','.join(x for x in [a, b, c, d] if x)})", + f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})", ) ] diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c6155cac101e6..500f8bf5ff159 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1369,7 +1369,7 @@ def test_to_string(self): ) lines = result.split("\n") header = lines[0].strip().split() - joined = "\n".join(re.sub(r"\s+", " ", x).strip() for x in lines[1:]) + joined = "\n".join([re.sub(r"\s+", " ", x).strip() for x in lines[1:]]) recons = read_csv(StringIO(joined), names=header, header=None, sep=" ") tm.assert_series_equal(recons["B"], biggie["B"]) assert recons["A"].count() == biggie["A"].count() diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 160e00f5fb930..5df4470635af5 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -434,10 +434,10 @@ def test_internal_null_byte(c_parser_only): def test_read_nrows_large(c_parser_only): # gh-7626 - Read only nrows of data in for large inputs (>262144b) parser = c_parser_only - header_narrow = "\t".join("COL_HEADER_" + str(i) for i in range(10)) + "\n" - data_narrow = "\t".join("somedatasomedatasomedata1" for _ in range(10)) + "\n" - header_wide = "\t".join("COL_HEADER_" + str(i) for i in range(15)) + "\n" - data_wide = "\t".join("somedatasomedatasomedata2" for _ in range(15)) + "\n" + header_narrow = "\t".join(["COL_HEADER_" + str(i) for i in range(10)]) + "\n" + data_narrow = "\t".join(["somedatasomedatasomedata1" for _ in range(10)]) + "\n" + header_wide = "\t".join(["COL_HEADER_" + str(i) for i in range(15)]) + "\n" + data_wide = "\t".join(["somedatasomedatasomedata2" for _ in range(15)]) + "\n" test_input = header_narrow + data_narrow * 1050 + header_wide + data_wide * 2 df = parser.read_csv(StringIO(test_input), sep="\t", nrows=1010) @@ -565,7 +565,7 @@ def test_bytes_exceed_2gb(c_parser_only): if parser.low_memory: pytest.skip("not a high_memory test") - csv = StringIO("strings\n" + "\n".join("x" * (1 << 20) for _ in range(2100))) + csv = StringIO("strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)])) df = parser.read_csv(csv) assert not df.empty diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index 981d1d438c3b0..123dce2048a44 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -44,7 +44,7 @@ def test_multi_thread_string_io_read_csv(all_parsers): num_files = 100 bytes_to_df = [ - "\n".join(f"{i:d},{i:d},{i:d}" for i in range(max_row_range)).encode() + "\n".join([f"{i:d},{i:d},{i:d}" for i in range(max_row_range)]).encode() for _ in range(num_files) ] diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index fecba8bd81404..6e445f6813310 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -123,7 +123,7 @@ def f(i, v): return buf - data = StringIO("\n".join(f(i, v) for i, v in enumerate(_NA_VALUES))) + data = StringIO("\n".join([f(i, v) for i, v in enumerate(_NA_VALUES)])) expected = DataFrame(np.nan, columns=range(nv), index=range(nv)) result = parser.read_csv(data, header=None) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 62650b4ef42a3..0735f60fabbf6 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -49,10 +49,10 @@ def test_deep_skip_rows(all_parsers): # see gh-4382 parser = all_parsers data = "a,b,c\n" + "\n".join( - ",".join([str(i), str(i + 1), str(i + 2)]) for i in range(10) + [",".join([str(i), str(i + 1), str(i + 2)]) for i in range(10)] ) condensed_data = "a,b,c\n" + "\n".join( - ",".join([str(i), str(i + 1), str(i + 2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9] + [",".join([str(i), str(i + 1), str(i + 2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9]] ) result = parser.read_csv(StringIO(data), skiprows=[6, 8]) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 290e063a59be7..97b3a82440ee6 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2336,7 +2336,7 @@ def psql_insert_copy(table, conn, keys, data_iter): writer.writerows(data_iter) s_buf.seek(0) - columns = ", ".join(f'"{k}"' for k in keys) + columns = ", ".join([f'"{k}"' for k in keys]) if table.schema: table_name = f"{table.schema}.{table.name}" else: diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 0cbe5d8ff43b9..c540c0e1c6721 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -245,7 +245,7 @@ def _format_argument_list(allow_args: list[str]): return f" except for the argument '{allow_args[0]}'" else: last = allow_args[-1] - args = ", ".join("'" + x + "'" for x in allow_args[:-1]) + args = ", ".join(["'" + x + "'" for x in allow_args[:-1]]) return f" except for the arguments {args} and '{last}'" @@ -385,10 +385,12 @@ def decorator(decorated: F) -> F: # formatting templates and concatenating docstring decorated.__doc__ = "".join( - component.format(**params) - if isinstance(component, str) - else dedent(component.__doc__ or "") - for component in docstring_components + [ + component.format(**params) + if isinstance(component, str) + else dedent(component.__doc__ or "") + for component in docstring_components + ] ) # error: "F" has no attribute "_docstring_components" diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index 6c180f68395db..289900c47375c 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -35,7 +35,7 @@ def _get_sys_info() -> dict[str, JSONSerializable]: language_code, encoding = locale.getlocale() return { "commit": _get_commit_hash(), - "python": ".".join(str(i) for i in sys.version_info), + "python": ".".join([str(i) for i in sys.version_info]), "python-bits": struct.calcsize("P") * 8, "OS": uname_result.system, "OS-release": uname_result.release, diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 3d59cef4d4f77..cd6a38f9e7ff1 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -373,11 +373,11 @@ def __str__(self) -> str: parts.append(f"{self.epoch}!") # Release segment - parts.append(".".join(str(x) for x in self.release)) + parts.append(".".join([str(x) for x in self.release])) # Pre-release if self.pre is not None: - parts.append("".join(str(x) for x in self.pre)) + parts.append("".join([str(x) for x in self.pre])) # Post-release if self.post is not None: @@ -419,7 +419,7 @@ def dev(self) -> int | None: @property def local(self) -> str | None: if self._version.local: - return ".".join(str(x) for x in self._version.local) + return ".".join([str(x) for x in self._version.local]) else: return None @@ -436,7 +436,7 @@ def base_version(self) -> str: parts.append(f"{self.epoch}!") # Release segment - parts.append(".".join(str(x) for x in self.release)) + parts.append(".".join([str(x) for x in self.release])) return "".join(parts) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 46cfae8e31208..6ebf9cedeb8e3 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -190,7 +190,7 @@ def test_bad_docstrings(self, capsys, klass, func, msgs): self._import_path(klass=klass, func=func) ) for msg in msgs: - assert msg in " ".join(err[1] for err in result["errors"]) + assert msg in " ".join([err[1] for err in result["errors"]]) def test_validate_all_ignore_deprecated(self, monkeypatch): monkeypatch.setattr( From cec877e6664c2620d0962f97edb94fc4843fa72c Mon Sep 17 00:00:00 2001 From: Vitor Serpa <83547959+vrserpa@users.noreply.github.com> Date: Sun, 11 Jul 2021 11:59:35 +0000 Subject: [PATCH 2/2] Fix pre-commit failures --- asv_bench/benchmarks/io/csv.py | 7 ++----- pandas/tests/indexes/period/test_indexing.py | 11 +++++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index c10dd8c7762ca..852c1e0d139e5 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -291,11 +291,8 @@ class ReadCSVFloatPrecision(StringIORewind): def setup(self, sep, decimal, float_precision): floats = [ - "".join( - [ - random.choice(string.digits) for _ in range(28) - ] - ) for _ in range(15) + "".join([random.choice(string.digits) for _ in range(28)]) + for _ in range(15) ] rows = sep.join([f"0{decimal}" + "{}"] * 3) + "\n" data = rows * 5 diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 1db8b545c20a2..3b7b738bec410 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -515,10 +515,13 @@ def test_get_indexer_mismatched_dtype_with_method(self, non_comparable_idx, meth continue # Two different error message patterns depending on dtypes msg = "|".join( - [re.escape(msg) for msg in ( - f"Cannot compare dtypes {pi.dtype} and {other.dtype}", - " not supported between instances of ", - )] + [ + re.escape(msg) + for msg in ( + f"Cannot compare dtypes {pi.dtype} and {other.dtype}", + " not supported between instances of ", + ) + ] ) with pytest.raises(TypeError, match=msg): pi.get_indexer(other2, method=method)