diff --git a/ci/code_checks.sh b/ci/code_checks.sh index e2dc543360a62..8cf9f164d140a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -267,11 +267,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then -k"-nonzero -reindex -searchsorted -to_dict" RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests generic.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests groupby.py' ; echo $MSG pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" RET=$(($RET + $?)) ; echo $MSG "DONE" @@ -311,6 +306,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/arrays/boolean.py RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests base.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/base.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests construction.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/construction.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests generic.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/generic.py + RET=$(($RET + $?)) ; echo $MSG "DONE" fi ### DOCSTRINGS ### diff --git a/pandas/core/base.py b/pandas/core/base.py index b9aeb32eea5c1..f2b678500a985 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1480,41 +1480,49 @@ def factorize(self, sort=False, na_sentinel=-1): Examples -------- - >>> x = pd.Series([1, 2, 3]) - >>> x + >>> ser = pd.Series([1, 2, 3]) + >>> ser 0 1 1 2 2 3 dtype: int64 - >>> x.searchsorted(4) + >>> ser.searchsorted(4) 3 - >>> x.searchsorted([0, 4]) + >>> ser.searchsorted([0, 4]) array([0, 3]) - >>> x.searchsorted([1, 3], side='left') + >>> ser.searchsorted([1, 3], side='left') array([0, 2]) - >>> x.searchsorted([1, 3], side='right') + >>> ser.searchsorted([1, 3], side='right') array([1, 3]) - >>> x = pd.Categorical(['apple', 'bread', 'bread', - 'cheese', 'milk'], ordered=True) + >>> ser = pd.Categorical( + ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True + ... ) + >>> ser [apple, bread, bread, cheese, milk] Categories (4, object): [apple < bread < cheese < milk] - >>> x.searchsorted('bread') + >>> ser.searchsorted('bread') 1 - >>> x.searchsorted(['bread'], side='right') + >>> ser.searchsorted(['bread'], side='right') array([3]) If the values are not monotonically sorted, wrong locations may be returned: - >>> x = pd.Series([2, 1, 3]) - >>> x.searchsorted(1) + >>> ser = pd.Series([2, 1, 3]) + >>> ser + 0 2 + 1 1 + 2 3 + dtype: int64 + + >>> ser.searchsorted(1) # doctest: +SKIP 0 # wrong result, correct would be 1 """ diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f947a1fda49f1..e2d8fba8d4148 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -4,6 +4,7 @@ These should not depend on core.internals. """ + from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast import numpy as np @@ -200,12 +201,12 @@ def array( >>> pd.array([1, 2, np.nan]) - [1, 2, NaN] + [1, 2, ] Length: 3, dtype: Int64 >>> pd.array(["a", None, "c"]) - ['a', nan, 'c'] + ['a', , 'c'] Length: 3, dtype: string >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ff7c481d550d4..b03ba2d325db5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1210,7 +1210,7 @@ def _set_axis_name(self, name, axis=0, inplace=False): >>> df.index = pd.MultiIndex.from_product( ... [["mammal"], ['dog', 'cat', 'monkey']]) >>> df._set_axis_name(["type", "name"]) - legs + num_legs type name mammal dog 4 cat 4 @@ -2181,45 +2181,141 @@ def to_json( Examples -------- - >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], - ... index=['row 1', 'row 2'], - ... columns=['col 1', 'col 2']) - >>> df.to_json(orient='split') - '{"columns":["col 1","col 2"], - "index":["row 1","row 2"], - "data":[["a","b"],["c","d"]]}' + >>> import json + >>> df = pd.DataFrame( + ... [["a", "b"], ["c", "d"]], + ... index=["row 1", "row 2"], + ... columns=["col 1", "col 2"], + ... ) + + >>> result = df.to_json(orient="split") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "columns": [ + "col 1", + "col 2" + ], + "index": [ + "row 1", + "row 2" + ], + "data": [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] + } Encoding/decoding a Dataframe using ``'records'`` formatted JSON. Note that index labels are not preserved with this encoding. - >>> df.to_json(orient='records') - '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + >>> result = df.to_json(orient="records") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + { + "col 1": "a", + "col 2": "b" + }, + { + "col 1": "c", + "col 2": "d" + } + ] Encoding/decoding a Dataframe using ``'index'`` formatted JSON: - >>> df.to_json(orient='index') - '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + >>> result = df.to_json(orient="index") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "row 1": { + "col 1": "a", + "col 2": "b" + }, + "row 2": { + "col 1": "c", + "col 2": "d" + } + } Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: - >>> df.to_json(orient='columns') - '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}' + >>> result = df.to_json(orient="columns") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "col 1": { + "row 1": "a", + "row 2": "c" + }, + "col 2": { + "row 1": "b", + "row 2": "d" + } + } Encoding/decoding a Dataframe using ``'values'`` formatted JSON: - >>> df.to_json(orient='values') - '[["a","b"],["c","d"]]' - - Encoding with Table Schema + >>> result = df.to_json(orient="values") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + [ + [ + "a", + "b" + ], + [ + "c", + "d" + ] + ] - >>> df.to_json(orient='table') - '{"schema": {"fields": [{"name": "index", "type": "string"}, - {"name": "col 1", "type": "string"}, - {"name": "col 2", "type": "string"}], - "primaryKey": "index", - "pandas_version": "0.20.0"}, - "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, - {"index": "row 2", "col 1": "c", "col 2": "d"}]}' + Encoding with Table Schema: + + >>> result = df.to_json(orient="table") + >>> parsed = json.loads(result) + >>> json.dumps(parsed, indent=4) # doctest: +SKIP + { + "schema": { + "fields": [ + { + "name": "index", + "type": "string" + }, + { + "name": "col 1", + "type": "string" + }, + { + "name": "col 2", + "type": "string" + } + ], + "primaryKey": [ + "index" + ], + "pandas_version": "0.20.0" + }, + "data": [ + { + "index": "row 1", + "col 1": "a", + "col 2": "b" + }, + { + "index": "row 2", + "col 1": "c", + "col 2": "d" + } + ] + } """ from pandas.io import json @@ -2646,7 +2742,8 @@ def to_clipboard( Copy the contents of a DataFrame to the clipboard. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) - >>> df.to_clipboard(sep=',') + + >>> df.to_clipboard(sep=',') # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # ,A,B,C ... # 0,1,2,3 @@ -2655,7 +2752,7 @@ def to_clipboard( We can omit the index by passing the keyword `index` and setting it to false. - >>> df.to_clipboard(sep=',', index=False) + >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # A,B,C ... # 1,2,3 @@ -4887,18 +4984,17 @@ def sample( Notes ----- - Use ``.pipe`` when chaining together functions that expect Series, DataFrames or GroupBy objects. Instead of writing - >>> f(g(h(df), arg1=a), arg2=b, arg3=c) + >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP You can write >>> (df.pipe(h) ... .pipe(g, arg1=a) - ... .pipe(f, arg2=b, arg3=c) - ... ) + ... .pipe(func, arg2=b, arg3=c) + ... ) # doctest: +SKIP If you have a function that takes the data as (say) the second argument, pass a tuple indicating which keyword expects the @@ -4906,8 +5002,8 @@ def sample( >>> (df.pipe(h) ... .pipe(g, arg1=a) - ... .pipe((f, 'arg2'), arg1=a, arg3=c) - ... ) + ... .pipe((func, 'arg2'), arg1=a, arg3=c) + ... ) # doctest: +SKIP """ @Appender(_shared_docs["pipe"] % _shared_doc_kwargs) @@ -5256,7 +5352,7 @@ def values(self) -> np.ndarray: dtype: object >>> df.values array([[ 3, 94, 31], - [ 29, 170, 115]], dtype=int64) + [ 29, 170, 115]]) A DataFrame with mixed type columns(e.g., str/object, int64, float32) results in an ndarray of the broadest type that accommodates these @@ -9511,12 +9607,13 @@ def describe( ... np.datetime64("2010-01-01") ... ]) >>> s.describe() - count 3 - unique 2 - top 2010-01-01 00:00:00 - freq 2 - first 2000-01-01 00:00:00 - last 2010-01-01 00:00:00 + count 3 + mean 2006-09-01 08:00:00 + min 2000-01-01 00:00:00 + 25% 2004-12-31 12:00:00 + 50% 2010-01-01 00:00:00 + 75% 2010-01-01 00:00:00 + max 2010-01-01 00:00:00 dtype: object Describing a ``DataFrame``. By default only numeric fields @@ -9539,11 +9636,11 @@ def describe( Describing all columns of a ``DataFrame`` regardless of data type. - >>> df.describe(include='all') - categorical numeric object + >>> df.describe(include='all') # doctest: +SKIP + categorical numeric object count 3 3.0 3 unique 3 NaN 3 - top f NaN c + top f NaN a freq 1 NaN 1 mean NaN 2.0 NaN std NaN 1.0 NaN @@ -9582,11 +9679,11 @@ def describe( Including only string columns in a ``DataFrame`` description. - >>> df.describe(include=[np.object]) + >>> df.describe(include=[np.object]) # doctest: +SKIP object count 3 unique 3 - top c + top a freq 1 Including only categorical columns from a ``DataFrame`` description. @@ -9600,16 +9697,16 @@ def describe( Excluding numeric columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.number]) + >>> df.describe(exclude=[np.number]) # doctest: +SKIP categorical object count 3 3 unique 3 3 - top f c + top f a freq 1 1 Excluding object columns from a ``DataFrame`` description. - >>> df.describe(exclude=[np.object]) + >>> df.describe(exclude=[np.object]) # doctest: +SKIP categorical numeric count 3 3.0 unique 3 NaN