diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index af4e83f506257..16654f0227182 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -38,9 +38,60 @@ class ParserError(ValueError): class DtypeWarning(Warning): """ - Warning that is raised for a dtype incompatibility. This - can happen whenever `pd.read_csv` encounters non- - uniform dtypes in a column(s) of a given CSV file. + Warning raised when reading different dtypes in a column from a file. + + Raised for a dtype incompatibility. This can happen whenever `read_csv` + or `read_table` encounter non-uniform dtypes in a column(s) of a given + CSV file. + + See Also + -------- + pandas.read_csv : Read CSV (comma-separated) file into a DataFrame. + pandas.read_table : Read general delimited file into a DataFrame. + + Notes + ----- + This warning is issued when dealing with larger files because the dtype + checking happens per chunk read. + + Despite the warning, the CSV file is read with mixed types in a single + column which will be an object type. See the examples below to better + understand this issue. + + Examples + -------- + This example creates and reads a large CSV file with a column that contains + `int` and `str`. + + >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + + ... ['1'] * 100000), + ... 'b': ['b'] * 300000}) + >>> df.to_csv('test.csv', index=False) + >>> df2 = pd.read_csv('test.csv') + + DtypeWarning: Columns (0) have mixed types + + Important to notice that ``df2`` will contain both `str` and `int` for the + same input, '1'. + + >>> df2.iloc[262140, 0] + '1' + >>> type(df2.iloc[262140, 0]) + + >>> df2.iloc[262150, 0] + 1 + >>> type(df2.iloc[262150, 0]) + + + One way to solve this issue is using the `dtype` parameter in the + `read_csv` and `read_table` functions to explicit the conversion: + + >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) + + No warning was issued. + + >>> import os + >>> os.remove('test.csv') """