diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 2df940817498c..f1959cd70ed3a 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -82,7 +82,8 @@ class OptionError(AttributeError, KeyError): - """Exception for pandas.options, backwards compatible with KeyError + """ + Exception for pandas.options, backwards compatible with KeyError checks """ @@ -545,7 +546,8 @@ def deprecate_option( def _select_options(pat: str) -> List[str]: - """returns a list of keys matching `pat` + """ + returns a list of keys matching `pat` if pat=="all", returns all registered options """ @@ -708,7 +710,8 @@ def pp(name: str, ks: Iterable[str]) -> List[str]: @contextmanager def config_prefix(prefix): - """contextmanager for multiple invocations of API with a common prefix + """ + contextmanager for multiple invocations of API with a common prefix supported API functions: (register / get / set )__option diff --git a/pandas/_testing.py b/pandas/_testing.py index 01d2bfe0458c8..b6ce06554cd59 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -743,7 +743,8 @@ def repr_class(x): def assert_attr_equal(attr, left, right, obj="Attributes"): - """checks attributes are equal. Both objects must have attribute. + """ + checks attributes are equal. Both objects must have attribute. Parameters ---------- @@ -820,7 +821,8 @@ def assert_is_sorted(seq): def assert_categorical_equal( left, right, check_dtype=True, check_category_order=True, obj="Categorical" ): - """Test that Categoricals are equivalent. + """ + Test that Categoricals are equivalent. Parameters ---------- @@ -860,7 +862,8 @@ def assert_categorical_equal( def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray"): - """Test that two IntervalArrays are equivalent. + """ + Test that two IntervalArrays are equivalent. Parameters ---------- @@ -1009,7 +1012,8 @@ def _raise(left, right, err_msg): def assert_extension_array_equal( left, right, check_dtype=True, check_less_precise=False, check_exact=False ): - """Check that left and right ExtensionArrays are equal. + """ + Check that left and right ExtensionArrays are equal. Parameters ---------- @@ -1489,7 +1493,8 @@ def assert_sp_array_equal( check_fill_value=True, consolidate_block_indices=False, ): - """Check that the left and right SparseArray are equal. + """ + Check that the left and right SparseArray are equal. Parameters ---------- @@ -1724,7 +1729,8 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None) def all_index_generator(k=10): - """Generator which can be iterated over to get instances of all the various + """ + Generator which can be iterated over to get instances of all the various index classes. Parameters @@ -1763,7 +1769,8 @@ def index_subclass_makers_generator(): def all_timeseries_index_generator(k=10): - """Generator which can be iterated over to get instances of all the classes + """ + Generator which can be iterated over to get instances of all the classes which represent time-series. Parameters @@ -1854,7 +1861,8 @@ def makePeriodFrame(nper=None): def makeCustomIndex( nentries, nlevels, prefix="#", names=False, ndupe_l=None, idx_type=None ): - """Create an index/multindex with given dimensions, levels, names, etc' + """ + Create an index/multindex with given dimensions, levels, names, etc' nentries - number of entries in index nlevels - number of levels (> 1 produces multindex) @@ -2144,7 +2152,8 @@ def makeMissingDataframe(density=0.9, random_state=None): def optional_args(decorator): - """allows a decorator to take optional positional and keyword arguments. + """ + allows a decorator to take optional positional and keyword arguments. Assumes that taking a single, callable, positional argument means that it is decorating a function, i.e. something like this:: @@ -2216,7 +2225,8 @@ def _get_default_network_errors(): def can_connect(url, error_classes=None): - """Try to connect to the given url. True if succeeds, False if IOError + """ + Try to connect to the given url. True if succeeds, False if IOError raised Parameters @@ -2584,7 +2594,8 @@ def use_numexpr(use, min_elements=None): def test_parallel(num_threads=2, kwargs_list=None): - """Decorator to run the same function multiple times in parallel. + """ + Decorator to run the same function multiple times in parallel. Parameters ---------- diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index 588bd24ddf797..a84dbb4a661e4 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -5,7 +5,8 @@ class DeepChainMap(ChainMap[_KT, _VT]): - """Variant of ChainMap that allows direct updates to inner scopes. + """ + Variant of ChainMap that allows direct updates to inner scopes. Only works when all passed mapping are mutable. """ diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 79b87f146b9a7..448f84d58d7a0 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -98,7 +98,8 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i def _make_unique_kwarg_list( seq: Sequence[Tuple[Any, Any]] ) -> Sequence[Tuple[Any, Any]]: - """Uniquify aggfunc name of the pairs in the order list + """ + Uniquify aggfunc name of the pairs in the order list Examples: -------- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c3c91cea43f6b..b5da6d4c11616 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1,4 +1,5 @@ -"""An interface for extending pandas with custom arrays. +""" +An interface for extending pandas with custom arrays. .. warning:: @@ -213,7 +214,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): @classmethod def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): - """Construct a new ExtensionArray from a sequence of strings. + """ + Construct a new ExtensionArray from a sequence of strings. .. versionadded:: 0.24.0 @@ -961,7 +963,8 @@ def __repr__(self) -> str: return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: - """Formatting function for scalar values. + """ + Formatting function for scalar values. This is used in the default '__repr__'. The returned formatting function receives instances of your scalar type. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6c7c35e9b4763..19602010fd882 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1891,7 +1891,8 @@ def __contains__(self, key) -> bool: return contains(self, key, container=self._codes) def _tidy_repr(self, max_vals=10, footer=True) -> str: - """ a short repr displaying only max_vals and an optional (but default + """ + a short repr displaying only max_vals and an optional (but default footer) """ num = max_vals // 2 diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 07aa8d49338c8..e39d1dc03adf5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -134,7 +134,8 @@ def _simple_new(cls, values, **kwargs): @property def _scalar_type(self) -> Type[DatetimeLikeScalar]: - """The scalar associated with this datelike + """ + The scalar associated with this datelike * PeriodArray : Period * DatetimeArray : Timestamp diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 642ae6d4deacb..f1e0882def13b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -477,7 +477,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: @property def _ndarray_values(self) -> np.ndarray: - """Internal pandas method for lossy conversion to a NumPy ndarray. + """ + Internal pandas method for lossy conversion to a NumPy ndarray. This method is not part of the pandas interface. @@ -492,7 +493,8 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, float]: return self.to_numpy(na_value=np.nan), np.nan def _values_for_argsort(self) -> np.ndarray: - """Return values for sorting. + """ + Return values for sorting. Returns ------- diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index c8bb0878b564d..ab3ee5bbcdc3a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -460,7 +460,8 @@ def from_tuples(cls, data, closed="right", copy=False, dtype=None): return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) def _validate(self): - """Verify that the IntervalArray is valid. + """ + Verify that the IntervalArray is valid. Checks that diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index eff9c03386a38..e77256a5aaadd 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -17,7 +17,8 @@ def _check_is_partition(parts, whole): def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): - """ For arbitrary (MultiIndexed) sparse Series return + """ + For arbitrary (MultiIndexed) sparse Series return (v, i, j, ilabels, jlabels) where (v, (i, j)) is suitable for passing to scipy.sparse.coo constructor. """ @@ -44,7 +45,8 @@ def get_indexers(levels): # labels_to_i[:] = np.arange(labels_to_i.shape[0]) def _get_label_to_i_dict(labels, sort_labels=False): - """ Return dict of unique labels to number. + """ + Return dict of unique labels to number. Optionally sort by label. """ labels = Index(map(tuple, labels)).unique().tolist() # squish diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 5563d3ae27118..7ed089b283903 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -1,4 +1,5 @@ -"""Operator classes for eval. +""" +Operator classes for eval. """ from datetime import datetime @@ -248,7 +249,8 @@ def is_datetime(self) -> bool: def _in(x, y): - """Compute the vectorized membership of ``x in y`` if possible, otherwise + """ + Compute the vectorized membership of ``x in y`` if possible, otherwise use Python. """ try: @@ -263,7 +265,8 @@ def _in(x, y): def _not_in(x, y): - """Compute the vectorized membership of ``x not in y`` if possible, + """ + Compute the vectorized membership of ``x not in y`` if possible, otherwise use Python. """ try: @@ -445,7 +448,8 @@ def evaluate(self, env, engine: str, parser, term_type, eval_in_python): return term_type(name, env=env) def convert_values(self): - """Convert datetimes to a comparable value in an expression. + """ + Convert datetimes to a comparable value in an expression. """ def stringify(value): diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index ce213c8532834..92a2c20cd2a9e 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -1,4 +1,5 @@ -""":func:`~pandas.eval` source string parsing functions +""" +:func:`~pandas.eval` source string parsing functions """ from io import StringIO diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 097c3c22aa6c3..828ec11c2bd38 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -149,7 +149,8 @@ def is_valid(self) -> bool: @property def is_in_table(self) -> bool: - """ return True if this is a valid column name for generation (e.g. an + """ + return True if this is a valid column name for generation (e.g. an actual column in the table) """ return self.queryables.get(self.lhs) is not None @@ -175,7 +176,8 @@ def generate(self, v) -> str: return f"({self.lhs} {self.op} {val})" def convert_value(self, v) -> "TermValue": - """ convert the expression that is in the term to something that is + """ + convert the expression that is in the term to something that is accepted by pytables """ diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 70dcf4defdb52..937c81fdeb8d6 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -31,7 +31,8 @@ def ensure_scope( def _replacer(x) -> str: - """Replace a number with its hexadecimal representation. Used to tag + """ + Replace a number with its hexadecimal representation. Used to tag temporary variables with their calling scope's id. """ # get the hex repr of the binary char and remove 0x and pad by pad_size diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 011c09c9ca1ef..1c969d40c2c7f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -105,7 +105,8 @@ def is_nested_object(obj) -> bool: def maybe_downcast_to_dtype(result, dtype): - """ try to cast to the specified dtype (e.g. convert back to bool/int + """ + try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ do_round = False @@ -750,7 +751,8 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): def invalidate_string_dtypes(dtype_set): - """Change string like dtypes to object for + """ + Change string like dtypes to object for ``DataFrame.select_dtypes()``. """ non_string_dtypes = dtype_set - {np.dtype("S").type, np.dtype(" bool: def concat_categorical(to_concat, axis: int = 0): - """Concatenate an object/categorical array of arrays, each of which is a + """ + Concatenate an object/categorical array of arrays, each of which is a single dtype Parameters diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1a16f8792e9e7..ec72541128708 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -281,21 +281,24 @@ def _validate_dtype(self, dtype): @property def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]: - """Used when a manipulation result has the same dimensions as the + """ + Used when a manipulation result has the same dimensions as the original. """ raise AbstractMethodError(self) @property def _constructor_sliced(self): - """Used when a manipulation result has one lower dimension(s) as the + """ + Used when a manipulation result has one lower dimension(s) as the original, such as DataFrame single columns slicing. """ raise AbstractMethodError(self) @property def _constructor_expanddim(self): - """Used when a manipulation result has one higher dimension as the + """ + Used when a manipulation result has one higher dimension as the original, such as Series.to_frame() """ raise NotImplementedError @@ -346,7 +349,8 @@ def _construct_axes_dict(self, axes=None, **kwargs): def _construct_axes_from_arguments( self, args, kwargs, require_all: bool = False, sentinel=None ): - """Construct and returns axes if supplied in args/kwargs. + """ + Construct and returns axes if supplied in args/kwargs. If require_all, raise if all axis arguments are not supplied return a tuple of (axes, kwargs). @@ -1735,7 +1739,8 @@ def keys(self): return self._info_axis def items(self): - """Iterate over (label, values) on info axis + """ + Iterate over (label, values) on info axis This is index for Series and columns for DataFrame. @@ -3115,18 +3120,22 @@ def to_csv( # Lookup Caching def _set_as_cached(self, item, cacher) -> None: - """Set the _cacher attribute on the calling object with a weakref to + """ + Set the _cacher attribute on the calling object with a weakref to cacher. """ self._cacher = (item, weakref.ref(cacher)) def _reset_cacher(self) -> None: - """Reset the cacher.""" + """ + Reset the cacher. + """ if hasattr(self, "_cacher"): del self._cacher def _maybe_cache_changed(self, item, value) -> None: - """The object has called back to us saying maybe it has changed. + """ + The object has called back to us saying maybe it has changed. """ self._data.set(item, value) @@ -5073,7 +5082,8 @@ def __finalize__( return self def __getattr__(self, name: str): - """After regular attribute access, try looking up the name + """ + After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ # Note: obj.x will always call obj.__getattribute__('x') prior to @@ -5091,7 +5101,8 @@ def __getattr__(self, name: str): return object.__getattribute__(self, name) def __setattr__(self, name: str, value) -> None: - """After regular attribute access, try setting the name + """ + After regular attribute access, try setting the name This allows simpler access to columns for interactive use. """ # first try regular attribute access via __getattribute__, so that @@ -5131,7 +5142,8 @@ def __setattr__(self, name: str, value) -> None: object.__setattr__(self, name, value) def _dir_additions(self): - """ add the string-like attributes from the info_axis. + """ + add the string-like attributes from the info_axis. If info_axis is a MultiIndex, it's first level values are used. """ additions = { @@ -5145,7 +5157,8 @@ def _dir_additions(self): # Consolidation of internals def _protect_consolidate(self, f): - """Consolidate _data -- if the blocks have changed, then clear the + """ + Consolidate _data -- if the blocks have changed, then clear the cache """ blocks_before = len(self._data.blocks) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index bb62d500311df..adb2ed9211bfe 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -519,7 +519,8 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): return new_target, indexer def _reindex_non_unique(self, target): - """ reindex from a non-unique; which CategoricalIndex's are almost + """ + reindex from a non-unique; which CategoricalIndex's are almost always """ new_target, indexer = self.reindex(target) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cb8b9cc04fc24..46017377f2b9c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -85,7 +85,8 @@ class IndexingError(Exception): class IndexingMixin: - """Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. + """ + Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. """ @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7d6ef11719b3a..34fa4c0e6544e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -256,7 +256,8 @@ def mgr_locs(self, new_mgr_locs): @property def array_dtype(self): - """ the dtype to return if I want to construct this block as an + """ + the dtype to return if I want to construct this block as an array """ return self.dtype @@ -374,7 +375,8 @@ def delete(self, loc): self.mgr_locs = self.mgr_locs.delete(loc) def apply(self, func, **kwargs) -> List["Block"]: - """ apply the function to my values; return a block if we are not + """ + apply the function to my values; return a block if we are not one """ with np.errstate(all="ignore"): @@ -400,7 +402,8 @@ def _split_op_result(self, result) -> List["Block"]: return [result] def fillna(self, value, limit=None, inplace=False, downcast=None): - """ fillna on the block with the value. If we fail, then convert to + """ + fillna on the block with the value. If we fail, then convert to ObjectBlock and try again """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -648,7 +651,8 @@ def convert( timedelta: bool = True, coerce: bool = False, ): - """ attempt to coerce any object types to better types return a copy + """ + attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we are not an ObjectBlock here! """ @@ -693,7 +697,8 @@ def copy(self, deep=True): def replace( self, to_replace, value, inplace=False, filter=None, regex=False, convert=True ): - """replace the to_replace value with value, possible to create new + """ + replace the to_replace value with value, possible to create new blocks here this is just a call to putmask. regex is not used here. It is used in ObjectBlocks. It is here for API compatibility. """ @@ -913,7 +918,8 @@ def setitem(self, indexer, value): return block def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): - """ putmask the data to the block; it is possible that we may create a + """ + putmask the data to the block; it is possible that we may create a new dtype of block return the resulting block(s) @@ -1446,7 +1452,8 @@ def equals(self, other) -> bool: return array_equivalent(self.values, other.values) def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): - """Return a list of unstacked blocks of self + """ + Return a list of unstacked blocks of self Parameters ---------- @@ -1584,7 +1591,8 @@ class NonConsolidatableMixIn: _validate_ndim = False def __init__(self, values, placement, ndim=None): - """Initialize a non-consolidatable block. + """ + Initialize a non-consolidatable block. 'ndim' may be inferred from 'placement'. @@ -1699,7 +1707,8 @@ def _get_unstack_items(self, unstacker, new_columns): class ExtensionBlock(NonConsolidatableMixIn, Block): - """Block for holding extension types. + """ + Block for holding extension types. Notes ----- @@ -1757,7 +1766,8 @@ def is_numeric(self): return self.values.dtype._is_numeric def setitem(self, indexer, value): - """Set the value inplace, returning a same-typed block. + """ + Set the value inplace, returning a same-typed block. This differs from Block.setitem by not allowing setitem to change the dtype of the Block. @@ -2291,7 +2301,8 @@ def _holder(self): return DatetimeArray def _maybe_coerce_values(self, values): - """Input validation for values passed to __init__. Ensure that + """ + Input validation for values passed to __init__. Ensure that we have datetime64TZ, coercing if necessary. Parameters @@ -2580,7 +2591,8 @@ def __init__(self, values, placement=None, ndim=2): @property def is_bool(self): - """ we can be a bool if we have only bool values but are of type + """ + we can be a bool if we have only bool values but are of type object """ return lib.is_bool_array(self.values.ravel()) @@ -2593,7 +2605,8 @@ def convert( timedelta: bool = True, coerce: bool = False, ): - """ attempt to coerce any object types to better types return a copy of + """ + attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! can return multiple blocks! @@ -2886,7 +2899,8 @@ def _holder(self): @property def array_dtype(self): - """ the dtype to return if I want to construct this block as an + """ + the dtype to return if I want to construct this block as an array """ return np.object_ diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 9dd4312a39525..57ed2555761be 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -534,7 +534,8 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): - """Convert list of dicts to numpy arrays + """ + Convert list of dicts to numpy arrays if `columns` is not passed, column names are inferred from the records - for OrderedDict and dicts, the column names match diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fb20b5e89ccf3..69ceb95985140 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1756,7 +1756,8 @@ def form_blocks(arrays, names, axes): def _simple_blockify(tuples, dtype): - """ return a single array of a block that has a single dtype; if dtype is + """ + return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype """ values, placement = _stack_arrays(tuples, dtype) @@ -1815,7 +1816,8 @@ def _shape_compat(x): def _interleaved_dtype( blocks: List[Block], ) -> Optional[Union[np.dtype, ExtensionDtype]]: - """Find the common dtype for `blocks`. + """ + Find the common dtype for `blocks`. Parameters ---------- diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6115c4af41b25..a5c609473760d 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -652,7 +652,8 @@ def _get_counts_nanvar( ddof: int, dtype: Dtype = float, ) -> Tuple[Union[int, np.ndarray], Union[int, np.ndarray]]: - """ Get the count of non-null values along an axis, accounting + """ + Get the count of non-null values along an axis, accounting for degrees of freedom. Parameters @@ -956,7 +957,8 @@ def nanskew( skipna: bool = True, mask: Optional[np.ndarray] = None, ) -> float: - """ Compute the sample skewness. + """ + Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly @@ -1194,7 +1196,8 @@ def _get_counts( axis: Optional[int], dtype: Dtype = float, ) -> Union[int, np.ndarray]: - """ Get the count of non-null values along an axis + """ + Get the count of non-null values along an axis Parameters ---------- diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index e499158a13b0c..86417faf6cd11 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -587,7 +587,8 @@ def _round_frac(x, precision: int): def _infer_precision(base_precision: int, bins) -> int: - """Infer an appropriate precision for _round_frac + """ + Infer an appropriate precision for _round_frac """ for precision in range(base_precision, 20): levels = [_round_frac(b, precision) for b in bins] diff --git a/pandas/io/common.py b/pandas/io/common.py index beb6c9d97aff3..c52583eed27ec 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -74,8 +74,9 @@ def is_url(url) -> bool: def _expand_user( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: - """Return the argument with an initial component of ~ or ~user - replaced by that user's home directory. + """ + Return the argument with an initial component of ~ or ~user + replaced by that user's home directory. Parameters ---------- @@ -103,7 +104,8 @@ def validate_header_arg(header) -> None: def stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: - """Attempt to convert a path-like object to a string. + """ + Attempt to convert a path-like object to a string. Parameters ---------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 70c09151258ff..97959bd125113 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -721,7 +721,8 @@ def _get_sheet_name(self, sheet_name): return sheet_name def _value_with_fmt(self, val): - """Convert numpy types to Python types for the Excel writers. + """ + Convert numpy types to Python types for the Excel writers. Parameters ---------- @@ -755,7 +756,8 @@ def _value_with_fmt(self, val): @classmethod def check_extension(cls, ext): - """checks that path's extension against the Writer's supported + """ + checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError. """ if ext.startswith("."): diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index ec5f6fcb17ff8..7af776dc1a10f 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -64,7 +64,8 @@ def get_sheet_by_name(self, name: str): raise ValueError(f"sheet {name} not found") def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - """Parse an ODF Table into a list of lists + """ + Parse an ODF Table into a list of lists """ from odf.table import CoveredTableCell, TableCell, TableRow @@ -120,7 +121,8 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: return table def _get_row_repeat(self, row) -> int: - """Return number of times this row was repeated + """ + Return number of times this row was repeated Repeating an empty row appeared to be a common way of representing sparse rows in the table. """ @@ -134,7 +136,8 @@ def _get_column_repeat(self, cell) -> int: return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1)) def _is_empty_row(self, row) -> bool: - """Helper function to find empty rows + """ + Helper function to find empty rows """ for column in row.childNodes: if len(column.childNodes) > 0: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index d35d466e6c5c9..a96c0f814e2d8 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -468,7 +468,8 @@ def write_cells( class _OpenpyxlReader(_BaseExcelReader): def __init__(self, filepath_or_buffer: FilePathOrBuffer) -> None: - """Reader using openpyxl engine. + """ + Reader using openpyxl engine. Parameters ---------- diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index a33406b6e80d7..c8d40d7141fc8 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -171,7 +171,8 @@ def _trim_excel_header(row): def _fill_mi_header(row, control_row): - """Forward fill blank entries in row but only inside the same parent index. + """ + Forward fill blank entries in row but only inside the same parent index. Used for creating headers in Multiindex. diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 16f800a6de2c9..8f7d3b1368fc7 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -9,7 +9,8 @@ class _XlrdReader(_BaseExcelReader): def __init__(self, filepath_or_buffer): - """Reader using xlrd engine. + """ + Reader using xlrd engine. Parameters ---------- diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index d102a885cef0a..78efe77e9fe2d 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -80,7 +80,8 @@ def write_cells( def _style_to_xlwt( cls, item, firstlevel: bool = True, field_sep=",", line_sep=";" ) -> str: - """helper which recursively generate an xlwt easy style string + """ + helper which recursively generate an xlwt easy style string for example: hstyle = {"font": {"bold": True}, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 28a069bc9fc1b..aac1df5dcd396 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -41,7 +41,8 @@ def __init__( class CSSToExcelConverter: - """A callable for converting CSS declarations to ExcelWriter styles + """ + A callable for converting CSS declarations to ExcelWriter styles Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), focusing on font styling, backgrounds, borders and alignment. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 55d534f975b68..0693c083c9ddc 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1832,7 +1832,8 @@ def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False) self.use_eng_prefix = use_eng_prefix def __call__(self, num: Union[int, float]) -> str: - """ Formats a number in engineering notation, appending a letter + """ + Formats a number in engineering notation, appending a letter representing the power of 1000 of the original number. Some examples: >>> format_eng(0) # for self.accuracy = 0 @@ -1930,7 +1931,8 @@ def _binify(cols: List[int], line_width: int) -> List[int]: def get_level_lengths( levels: Any, sentinel: Union[bool, object, str] = "" ) -> List[Dict[int, int]]: - """For each index in each level the function returns lengths of indexes. + """ + For each index in each level the function returns lengths of indexes. Parameters ---------- diff --git a/pandas/io/html.py b/pandas/io/html.py index ee8e96b4b3344..561570f466b68 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -600,7 +600,8 @@ def _build_doc(self): def _build_xpath_expr(attrs) -> str: - """Build an xpath expression to simulate bs4's ability to pass in kwargs to + """ + Build an xpath expression to simulate bs4's ability to pass in kwargs to search for attributes when using the lxml parser. Parameters diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1390d2d514a5e..048aa8b1915d1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3224,7 +3224,8 @@ def is_multi_index(self) -> bool: return isinstance(self.levels, list) def validate_multiindex(self, obj): - """validate that we can store the multi-index; reset and return the + """ + validate that we can store the multi-index; reset and return the new object """ levels = [ @@ -3375,7 +3376,8 @@ def validate_version(self, where=None): warnings.warn(ws, IncompatibilityWarning) def validate_min_itemsize(self, min_itemsize): - """validate the min_itemsize doesn't contain items that are not in the + """ + validate the min_itemsize doesn't contain items that are not in the axes this needs data_columns to be defined """ if min_itemsize is None: @@ -3587,7 +3589,8 @@ def get_object(cls, obj, transposed: bool): return obj def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): - """take the input data_columns and min_itemize and create a data + """ + take the input data_columns and min_itemize and create a data columns spec """ if not len(non_index_axes): @@ -3999,7 +4002,8 @@ def create_description( def read_coordinates( self, where=None, start: Optional[int] = None, stop: Optional[int] = None, ): - """select coordinates (row numbers) from a table; return the + """ + select coordinates (row numbers) from a table; return the coordinates object """ # validate the version @@ -4028,7 +4032,8 @@ def read_column( start: Optional[int] = None, stop: Optional[int] = None, ): - """return a single column from the table, generally only indexables + """ + return a single column from the table, generally only indexables are interesting """ # validate the version diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 69e5a973ff706..e7120b1f6da08 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -653,7 +653,8 @@ def create(self): self._execute_create() def _execute_insert(self, conn, keys, data_iter): - """Execute SQL statement inserting data + """ + Execute SQL statement inserting data Parameters ---------- @@ -667,7 +668,8 @@ def _execute_insert(self, conn, keys, data_iter): conn.execute(self.table.insert(), data) def _execute_insert_multi(self, conn, keys, data_iter): - """Alternative to _execute_insert for DBs support multivalue INSERT. + """ + Alternative to _execute_insert for DBs support multivalue INSERT. Note: multi-value insert is usually faster for analytics DBs and tables containing a few columns @@ -1092,7 +1094,8 @@ def read_table( schema=None, chunksize=None, ): - """Read SQL database table into a DataFrame. + """ + Read SQL database table into a DataFrame. Parameters ---------- @@ -1168,7 +1171,8 @@ def read_query( params=None, chunksize=None, ): - """Read SQL query into a DataFrame. + """ + Read SQL query into a DataFrame. Parameters ---------- diff --git a/pandas/io/stata.py b/pandas/io/stata.py index cf3251faae979..593228e99477b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -482,7 +482,8 @@ class InvalidColumnName(Warning): def _cast_to_stata_types(data: DataFrame) -> DataFrame: - """Checks the dtypes of the columns of a pandas DataFrame for + """ + Checks the dtypes of the columns of a pandas DataFrame for compatibility with the data types and ranges supported by Stata, and converts if necessary. @@ -2128,7 +2129,8 @@ def _write_bytes(self, value: bytes) -> None: self._file.write(value) def _prepare_categoricals(self, data: DataFrame) -> DataFrame: - """Check for categorical columns, retain categorical information for + """ + Check for categorical columns, retain categorical information for Stata file and convert categorical data to int """ is_cat = [is_categorical_dtype(data[col]) for col in data] @@ -2170,7 +2172,8 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: def _replace_nans(self, data: DataFrame) -> DataFrame: # return data - """Checks floating point data columns for nans, and replaces these with + """ + Checks floating point data columns for nans, and replaces these with the generic Stata for missing value (.) """ for c in data: @@ -3035,7 +3038,8 @@ def _write_header( self._write_bytes(self._tag(bio.read(), "header")) def _write_map(self) -> None: - """Called twice during file write. The first populates the values in + """ + Called twice during file write. The first populates the values in the map with 0s. The second call writes the final map locations when all blocks have been written. """ @@ -3185,7 +3189,8 @@ def _write_file_close_tag(self) -> None: self._update_map("end-of-file") def _update_strl_names(self) -> None: - """Update column names for conversion to strl if they might have been + """ + Update column names for conversion to strl if they might have been changed to comply with Stata naming rules """ # Update convert_strl if names changed @@ -3195,7 +3200,8 @@ def _update_strl_names(self) -> None: self._convert_strl[idx] = new def _convert_strls(self, data: DataFrame) -> DataFrame: - """Convert columns to StrLs if either very large or in the + """ + Convert columns to StrLs if either very large or in the convert_strl variable """ convert_cols = [ diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index dafdd6eecabc0..5743288982da4 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -100,7 +100,8 @@ def _subplots( layout_type="box", **fig_kw, ): - """Create a figure with a set of subplots already made. + """ + Create a figure with a set of subplots already made. This utility wrapper makes it convenient to create common layouts of subplots, including the enclosing figure object, in a single call. diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index b67ca4cfab83d..cd4b43c83340f 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -1,4 +1,5 @@ -"""Rudimentary Apache Arrow-backed ExtensionArray. +""" +Rudimentary Apache Arrow-backed ExtensionArray. At the moment, just a boolean array / type is implemented. Eventually, we'll want to parametrize the type and support diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index e2b6ea0304f6a..323cb843b2d74 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -1,4 +1,5 @@ -"""Base test suite for extension arrays. +""" +Base test suite for extension arrays. These tests are intended for third-party libraries to subclass to validate that their extension arrays and dtypes satisfy the interface. Moving or diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 0609f19c8e0c3..4009041218ac2 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -51,7 +51,8 @@ def _check_divmod_op(self, s, op, other, exc=Exception): class BaseArithmeticOpsTests(BaseOpsUtil): - """Various Series and DataFrame arithmetic ops methods. + """ + Various Series and DataFrame arithmetic ops methods. Subclasses supporting various ops should set the class variables to indicate that they support ops of that kind diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index d37638d37e4d6..1942d737780da 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -13,7 +13,8 @@ def dtype(): @pytest.fixture def data(): - """Length-100 array for this type. + """ + Length-100 array for this type. * data[0] and data[1] should both be non missing * data[0] and data[1] should not be equal @@ -67,7 +68,8 @@ def gen(count): @pytest.fixture def data_for_sorting(): - """Length-3 array with a known sort order. + """ + Length-3 array with a known sort order. This should be three items [B, C, A] with A < B < C @@ -77,7 +79,8 @@ def data_for_sorting(): @pytest.fixture def data_missing_for_sorting(): - """Length-3 array with a known sort order. + """ + Length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. @@ -87,7 +90,8 @@ def data_missing_for_sorting(): @pytest.fixture def na_cmp(): - """Binary operator for comparing NA values. + """ + Binary operator for comparing NA values. Should return a function of two arguments that returns True if both arguments are (scalar) NA for your type. @@ -105,7 +109,8 @@ def na_value(): @pytest.fixture def data_for_grouping(): - """Data for factorization, grouping, and unique tests. + """ + Data for factorization, grouping, and unique tests. Expected to be like [B, B, NA, NA, A, A, B, C] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index e6b147e7a4ce7..a229a824d0f9b 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -1,4 +1,5 @@ -"""Test extension array for storing nested data in a pandas container. +""" +Test extension array for storing nested data in a pandas container. The JSONArray stores lists of dictionaries. The storage mechanism is a list, not an ndarray. diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index ebac36c5f8c78..1214734358c80 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -107,7 +107,8 @@ def three_group(): @pytest.fixture(params=sorted(reduction_kernels)) def reduction_func(request): - """yields the string names of all groupby reduction functions, one at a time. + """ + yields the string names of all groupby reduction functions, one at a time. """ return request.param diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 9d55609d5db00..9cc031001f81c 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -101,7 +101,8 @@ def setup_method(self, method): setattr(self, kind, d) def generate_indices(self, f, values=False): - """ generate the indices + """ + generate the indices if values is True , use the axis values is False, use the range """ diff --git a/pandas/tests/indexing/multiindex/conftest.py b/pandas/tests/indexing/multiindex/conftest.py index 48e090b242208..0256f5e35e1db 100644 --- a/pandas/tests/indexing/multiindex/conftest.py +++ b/pandas/tests/indexing/multiindex/conftest.py @@ -20,7 +20,8 @@ def multiindex_dataframe_random_data(): @pytest.fixture def multiindex_year_month_day_dataframe_random_data(): - """DataFrame with 3 level MultiIndex (year, month, day) covering + """ + DataFrame with 3 level MultiIndex (year, month, day) covering first 100 business days from 2000-01-01 with random data """ tdf = tm.makeTimeDataFrame(100) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 7810778602e12..fe71ca77a7dda 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -27,7 +27,8 @@ def salaries_table(datapath): @pytest.fixture def s3_resource(tips_file, jsonl_file): - """Fixture for mocking S3 interaction. + """ + Fixture for mocking S3 interaction. The primary bucket name is "pandas-test". The following datasets are loaded. diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index 7f0b3ab7957e6..aad18890de3ad 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -74,7 +74,8 @@ def ensure_clean_path(path): def _maybe_remove(store, key): - """For tests using tables, try removing the table to be sure there is + """ + For tests using tables, try removing the table to be sure there is no content from previous tests using the same table name. """ try: diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index a4ac15d9f3b07..d5b71a6e4cee1 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -98,7 +98,8 @@ def _index_name(): @pytest.fixture def index(_index_factory, _index_start, _index_end, _index_freq, _index_name): - """Fixture for parametrization of date_range, period_range and + """ + Fixture for parametrization of date_range, period_range and timedelta_range indexes """ return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name) @@ -106,7 +107,8 @@ def index(_index_factory, _index_start, _index_end, _index_freq, _index_name): @pytest.fixture def _static_values(index): - """Fixture for parametrization of values used in parametrization of + """ + Fixture for parametrization of values used in parametrization of Series and DataFrames with date_range, period_range and timedelta_range indexes """ @@ -115,7 +117,8 @@ def _static_values(index): @pytest.fixture def _series_name(): - """Fixture for parametrization of Series name for Series used with + """ + Fixture for parametrization of Series name for Series used with date_range, period_range and timedelta_range indexes """ return None @@ -123,7 +126,8 @@ def _series_name(): @pytest.fixture def series(index, _series_name, _static_values): - """Fixture for parametrization of Series with date_range, period_range and + """ + Fixture for parametrization of Series with date_range, period_range and timedelta_range indexes """ return Series(_static_values, index=index, name=_series_name) @@ -131,7 +135,8 @@ def series(index, _series_name, _static_values): @pytest.fixture def empty_series(series): - """Fixture for parametrization of empty Series with date_range, + """ + Fixture for parametrization of empty Series with date_range, period_range and timedelta_range indexes """ return series[:0] @@ -139,7 +144,8 @@ def empty_series(series): @pytest.fixture def frame(index, _series_name, _static_values): - """Fixture for parametrization of DataFrame with date_range, period_range + """ + Fixture for parametrization of DataFrame with date_range, period_range and timedelta_range indexes """ # _series_name is intentionally unused @@ -148,7 +154,8 @@ def frame(index, _series_name, _static_values): @pytest.fixture def empty_frame(series): - """Fixture for parametrization of empty DataFrame with date_range, + """ + Fixture for parametrization of empty DataFrame with date_range, period_range and timedelta_range indexes """ index = series.index[:0] @@ -157,7 +164,8 @@ def empty_frame(series): @pytest.fixture(params=[Series, DataFrame]) def series_and_frame(request, series, frame): - """Fixture for parametrization of Series and DataFrame with date_range, + """ + Fixture for parametrization of Series and DataFrame with date_range, period_range and timedelta_range indexes """ if request.param == Series: diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index bfcfd1c5a7101..682575cc9ed48 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -216,7 +216,8 @@ def validate_bool_kwarg(value, arg_name): def validate_axis_style_args(data, args, kwargs, arg_name, method_name): - """Argument handler for mixed index, columns / axis functions + """ + Argument handler for mixed index, columns / axis functions In an attempt to handle both `.method(index, columns)`, and `.method(arg, axis=.)`, we have to do some bad things to argument @@ -310,7 +311,8 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): - """Validate the keyword arguments to 'fillna'. + """ + Validate the keyword arguments to 'fillna'. This checks that exactly one of 'value' and 'method' is specified. If 'method' is specified, this validates that it's a valid method.