diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b619ea93b981d..a87d27d8b35d8 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -241,7 +241,7 @@ def read_sql_table( try: meta.reflect(only=[table_name], views=True) except sqlalchemy.exc.InvalidRequestError: - raise ValueError("Table {name} not found".format(name=table_name)) + raise ValueError(f"Table {table_name} not found") pandas_sql = SQLDatabase(con, meta=meta) table = pandas_sql.read_table( @@ -256,7 +256,7 @@ def read_sql_table( if table is not None: return table else: - raise ValueError("Table {name} not found".format(name=table_name), con) + raise ValueError(f"Table {table_name} not found", con) def read_sql_query( @@ -498,7 +498,7 @@ def to_sql( .. versionadded:: 0.24.0 """ if if_exists not in ("fail", "replace", "append"): - raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) + raise ValueError(f"'{if_exists}' is not valid for if_exists") pandas_sql = pandasSQL_builder(con, schema=schema) @@ -625,7 +625,7 @@ def __init__( self.table = self.pd_sql.get_table(self.name, self.schema) if self.table is None: - raise ValueError("Could not init table '{name}'".format(name=name)) + raise ValueError(f"Could not init table '{name}'") def exists(self): return self.pd_sql.has_table(self.name, self.schema) @@ -643,18 +643,14 @@ def _execute_create(self): def create(self): if self.exists(): if self.if_exists == "fail": - raise ValueError( - "Table '{name}' already exists.".format(name=self.name) - ) + raise ValueError(f"Table '{self.name}' already exists.") elif self.if_exists == "replace": self.pd_sql.drop_table(self.name, self.schema) self._execute_create() elif self.if_exists == "append": pass else: - raise ValueError( - "'{0}' is not valid for if_exists".format(self.if_exists) - ) + raise ValueError(f"'{self.if_exists}' is not valid for if_exists") else: self._execute_create() @@ -689,7 +685,7 @@ def insert_data(self): try: temp.reset_index(inplace=True) except ValueError as err: - raise ValueError("duplicate name in index/columns: {0}".format(err)) + raise ValueError(f"duplicate name in index/columns: {err}") else: temp = self.frame @@ -732,7 +728,7 @@ def insert(self, chunksize=None, method=None): elif callable(method): exec_insert = partial(method, self) else: - raise ValueError("Invalid parameter `method`: {}".format(method)) + raise ValueError(f"Invalid parameter `method`: {method}") keys, data_list = self.insert_data() @@ -826,7 +822,7 @@ def _index_name(self, index, index_label): if len(index_label) != nlevels: raise ValueError( "Length of 'index_label' should match number of " - "levels, which is {0}".format(nlevels) + f"levels, which is {nlevels}" ) else: return index_label @@ -839,7 +835,7 @@ def _index_name(self, index, index_label): return ["index"] else: return [ - l if l is not None else "level_{0}".format(i) + l if l is not None else f"level_{i}" for i, l in enumerate(self.frame.index.names) ] @@ -1304,10 +1300,7 @@ def to_sql( for col, my_type in dtype.items(): if not isinstance(to_instance(my_type), TypeEngine): - raise ValueError( - "The type of {column} is not a " - "SQLAlchemy type ".format(column=col) - ) + raise ValueError(f"The type of {col} is not a " "SQLAlchemy type ") table = SQLTable( name, @@ -1331,11 +1324,11 @@ def to_sql( ) if name not in table_names: msg = ( - "The provided table name '{0}' is not found exactly as " + f"The provided table name '{name}' is not found exactly as " "such in the database after writing the table, possibly " "due to case sensitivity issues. Consider using lower " "case table names." - ).format(name) + ) warnings.warn(msg, UserWarning) @property @@ -1395,9 +1388,7 @@ def _get_unicode_name(name): try: uname = str(name).encode("utf-8", "strict").decode("utf-8") except UnicodeError: - raise ValueError( - "Cannot convert identifier to UTF-8: '{name}'".format(name=name) - ) + raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") return uname @@ -1461,8 +1452,8 @@ def insert_statement(self): bracketed_names = [escape(column) for column in names] col_names = ",".join(bracketed_names) wildcards = ",".join([wld] * len(names)) - insert_statement = "INSERT INTO {table} ({columns}) VALUES ({wld})".format( - table=escape(self.name), columns=col_names, wld=wildcards + insert_statement = ( + f"INSERT INTO {escape(self.name)} ({col_names}) VALUES ({wildcards})" ) return insert_statement @@ -1496,9 +1487,7 @@ def _create_table_setup(self): keys = self.keys cnames_br = ", ".join(escape(c) for c in keys) create_tbl_stmts.append( - "CONSTRAINT {tbl}_pk PRIMARY KEY ({cnames_br})".format( - tbl=self.name, cnames_br=cnames_br - ) + f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})" ) create_stmts = [ @@ -1599,14 +1588,11 @@ def execute(self, *args, **kwargs): self.con.rollback() except Exception as inner_exc: # pragma: no cover ex = DatabaseError( - "Execution failed on sql: {sql}\n{exc}\nunable " - "to rollback".format(sql=args[0], exc=exc) + f"Execution failed on sql: {args[0]}\n{exc}\nunable " "to rollback" ) raise ex from inner_exc - ex = DatabaseError( - "Execution failed on sql '{sql}': {exc}".format(sql=args[0], exc=exc) - ) + ex = DatabaseError(f"Execution failed on sql '{args[0]}': {exc}") raise ex from exc @staticmethod @@ -1731,11 +1717,7 @@ def to_sql( if dtype is not None: for col, my_type in dtype.items(): if not isinstance(my_type, str): - raise ValueError( - "{column} ({type!s}) not a string".format( - column=col, type=my_type - ) - ) + raise ValueError(f"{col} ({my_type}) not a string") table = SQLiteTable( name, @@ -1755,9 +1737,7 @@ def has_table(self, name, schema=None): # esc_name = escape(name) wld = "?" - query = ( - "SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" - ).format(wld=wld) + query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" return len(self.execute(query, [name]).fetchall()) > 0 @@ -1765,7 +1745,7 @@ def get_table(self, table_name, schema=None): return None # not supported in fallback mode def drop_table(self, name, schema=None): - drop_sql = "DROP TABLE {name}".format(name=_get_valid_sqlite_name(name)) + drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" self.execute(drop_sql) def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index dbe64e4c0f06d..c80a14fa0795f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -370,7 +370,7 @@ def convert_delta_safe(base, deltas, unit): month = np.ones_like(dates) conv_dates = convert_year_month_safe(year, month) else: - raise ValueError("Date fmt {fmt} not understood".format(fmt=fmt)) + raise ValueError(f"Date fmt {fmt} not understood") if has_bad_values: # Restore NaT for bad values conv_dates[bad_locs] = NaT @@ -465,9 +465,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): d = parse_dates_safe(dates, year=True) conv_dates = d.year else: - raise ValueError( - "Format {fmt} is not a known Stata date format".format(fmt=fmt) - ) + raise ValueError(f"Format {fmt} is not a known Stata date format") conv_dates = Series(conv_dates, dtype=np.float64) missing_value = struct.unpack(" float32_max: data[col] = data[col].astype(np.float64) elif dtype == np.float64: if value > float64_max: raise ValueError( - "Column {col} has a maximum value " - "({val}) outside the range supported by " - "Stata ({float64_max})".format( - col=col, val=value, float64_max=float64_max - ) + f"Column {col} has a maximum value " + f"({value}) outside the range supported by " + f"Stata ({float64_max})" ) if ws: @@ -652,9 +629,15 @@ def __init__(self, catarray): category = vl[1] if not isinstance(category, str): category = str(category) + value_label_mismatch_msg = ( + "\nStata value labels (pandas categories) must be strings. " + f"Column {catarray.name} contains non-string labels which " + "will be converted to strings. Please check that the " + "Stata data file created has not lost information due to " + "duplicate labels." + ) warnings.warn( - value_label_mismatch_doc.format(catarray.name), - ValueLabelTypeMismatch, + value_label_mismatch_msg, ValueLabelTypeMismatch, ) self.off.append(self.text_len) @@ -1192,7 +1175,7 @@ def f(typ): try: return self.TYPE_MAP_XML[typ] except KeyError: - raise ValueError("cannot convert stata types [{0}]".format(typ)) + raise ValueError(f"cannot convert stata types [{typ}]") typlist = [f(x) for x in raw_typlist] @@ -1202,7 +1185,7 @@ def f(typ): try: return self.DTYPE_MAP_XML[typ] except KeyError: - raise ValueError("cannot convert stata dtype [{0}]".format(typ)) + raise ValueError(f"cannot convert stata dtype [{typ}]") dtyplist = [f(x) for x in raw_typlist] @@ -1330,19 +1313,13 @@ def _read_old_header(self, first_char): try: self.typlist = [self.TYPE_MAP[typ] for typ in typlist] except ValueError: - raise ValueError( - "cannot convert stata types [{0}]".format( - ",".join(str(x) for x in typlist) - ) - ) + errjoin = ", ".join(str(x) for x in typlist) + raise ValueError("cannot convert stata types " f"[{errjoin}]") try: self.dtyplist = [self.DTYPE_MAP[typ] for typ in typlist] except ValueError: - raise ValueError( - "cannot convert stata dtypes [{0}]".format( - ",".join(str(x) for x in typlist) - ) - ) + errjoin = ", ".join(str(x) for x in typlist) + raise ValueError("cannot convert stata dtypes " f"[{errjoin}]") if self.format_version > 108: self.varlist = [ @@ -1415,12 +1392,14 @@ def _decode(self, s): except UnicodeDecodeError: # GH 25960, fallback to handle incorrect format produced when 117 # files are converted to 118 files in Stata - msg = """ -One or more strings in the dta file could not be decoded using {encoding}, and -so the fallback encoding of latin-1 is being used. This can happen when a file -has been incorrectly encoded by Stata or some other software. You should verify -the string values returned are correct.""" - warnings.warn(msg.format(encoding=self._encoding), UnicodeWarning) + msg = ( + "\nOne or more strings in the dta file could not be decoded " + f"using {self._encoding}, and\nso the fallback encoding of " + "latin-1 is being used. This can happen when a file\nhas been " + "incorrectly encoded by Stata or some other software. You " + "should verify\nthe string values returned are correct." + ) + warnings.warn(msg, UnicodeWarning) return s.decode("latin-1") def _read_value_labels(self): @@ -1794,18 +1773,16 @@ def _do_convert_categoricals( repeats = list(vc.index[vc > 1]) repeats = "-" * 80 + "\n" + "\n".join(repeats) # GH 25772 - msg = """ -Value labels for column {col} are not unique. These cannot be converted to -pandas categoricals. - -Either read the file with `convert_categoricals` set to False or use the -low level interface in `StataReader` to separately read the values and the -value_labels. - -The repeated labels are: -{repeats} -""" - raise ValueError(msg.format(col=col, repeats=repeats)) + msg = ( + f"\nValue labels for column {col} are not unique. These " + "cannot be converted to\npandas categoricals.\n\n" + "Either read the file with `convert_categoricals` set " + "to False or use the\nlow level interface in " + "`StataReader` to separately read the values and the" + "\nvalue_labels.\n\nThe repeated labels are:\n" + f"{repeats}\n" + ) + raise ValueError(msg) # TODO: is the next line needed above in the data(...) method? cat_data = Series(cat_data, index=data.index) cat_converted_data.append((col, cat_data)) @@ -1874,7 +1851,7 @@ def _set_endianness(endianness): elif endianness.lower() in [">", "big"]: return ">" else: # pragma : no cover - raise ValueError("Endianness {endian} not understood".format(endian=endianness)) + raise ValueError(f"Endianness {endianness} not understood") def _pad_bytes(name, length): @@ -1906,7 +1883,7 @@ def _convert_datetime_to_stata_type(fmt): ]: return np.float64 # Stata expects doubles for SIFs else: - raise NotImplementedError("Format {fmt} not implemented".format(fmt=fmt)) + raise NotImplementedError(f"Format {fmt} not implemented") def _maybe_convert_to_int_keys(convert_dates, varlist): @@ -1956,9 +1933,7 @@ def _dtype_to_stata_type(dtype, column): elif dtype == np.int8: return 251 else: # pragma : no cover - raise NotImplementedError( - "Data type {dtype} not supported.".format(dtype=dtype) - ) + raise NotImplementedError(f"Data type {dtype} not supported.") def _dtype_to_default_stata_fmt(dtype, column, dta_version=114, force_strl=False): @@ -1988,14 +1963,14 @@ def _dtype_to_default_stata_fmt(dtype, column, dta_version=114, force_strl=False inferred_dtype = infer_dtype(column, skipna=True) if not (inferred_dtype in ("string", "unicode") or len(column) == 0): raise ValueError( - "Column `{col}` cannot be exported.\n\nOnly " + f"Column `{column.name}` cannot be exported.\n\nOnly " "string-like object arrays containing all " "strings or a mix of strings and None can be " "exported. Object arrays containing only null " "values are prohibited. Other object types" "cannot be exported and must first be converted " "to one of the supported " - "types.".format(col=column.name) + "types." ) itemsize = max_len_string_array(ensure_object(column.values)) if itemsize > max_str_len: @@ -2013,9 +1988,7 @@ def _dtype_to_default_stata_fmt(dtype, column, dta_version=114, force_strl=False elif dtype == np.int8 or dtype == np.int16: return "%8.0g" else: # pragma : no cover - raise NotImplementedError( - "Data type {dtype} not supported.".format(dtype=dtype) - ) + raise NotImplementedError(f"Data type {dtype} not supported.") class StataWriter(StataParser): @@ -2251,11 +2224,20 @@ def _check_column_names(self, data): orig_name = orig_name.encode("utf-8") except (UnicodeDecodeError, AttributeError): pass - msg = "{0} -> {1}".format(orig_name, name) + msg = f"{orig_name} -> {name}" conversion_warning.append(msg) - ws = invalid_name_doc.format("\n ".join(conversion_warning)) - warnings.warn(ws, InvalidColumnName) + invalid_name_warning = "\n ".join(conversion_warning) + invalid_name_msg = ( + "Not all pandas column names were valid Stata variable names." + "The following replacements have been made:\n\n" + f"{invalid_name_warning}\n\n" + "If this is not what you expect, please make sure you have " + "Stata-compliant column names in your DataFrame (strings only, " + "max 32 characters, only alphanumerics and underscores, no " + "Stata reserved words)" + ) + warnings.warn(invalid_name_msg, InvalidColumnName) self._converted_names = converted_names self._update_strl_names() @@ -2344,9 +2326,8 @@ def write_file(self): os.unlink(self._fname) except OSError: warnings.warn( - "This save was not successful but {0} could not " - "be deleted. This file is not " - "valid.".format(self._fname), + f"This save was not successful but {self._fname} could " + "not be deleted. This file is not valid.", ResourceWarning, ) raise exc @@ -2527,7 +2508,7 @@ def _prepare_data(self): typ = typlist[i] if typ <= self._max_string_length: data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,)) - stype = "S{type}".format(type=typ) + stype = f"S{typ}" dtypes[col] = stype data[col] = data[col].str.encode(self._encoding).astype(stype) else: