From bddbef4a86e097d8c134bfc9b398c864ee8e83c5 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 11 Feb 2020 19:11:42 +0000 Subject: [PATCH 1/4] D213: Multi-line docstring summary should start at the second line --- pandas/_config/config.py | 9 ++-- pandas/_testing.py | 33 ++++++++----- pandas/compat/chainmap.py | 3 +- pandas/core/aggregation.py | 3 +- pandas/core/arrays/base.py | 9 ++-- pandas/core/arrays/categorical.py | 3 +- pandas/core/arrays/datetimelike.py | 3 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/integer.py | 6 ++- pandas/core/arrays/interval.py | 3 +- pandas/core/arrays/sparse/scipy_sparse.py | 6 ++- pandas/core/computation/ops.py | 12 +++-- pandas/core/computation/parsing.py | 3 +- pandas/core/computation/pytables.py | 9 ++-- pandas/core/computation/scope.py | 3 +- pandas/core/dtypes/cast.py | 9 ++-- pandas/core/dtypes/concat.py | 3 +- pandas/core/generic.py | 36 +++++++++----- pandas/core/indexes/category.py | 3 +- pandas/core/indexing.py | 3 +- pandas/core/internals/blocks.py | 45 +++++++++++------ pandas/core/internals/construction.py | 3 +- pandas/core/internals/managers.py | 6 ++- pandas/core/nanops.py | 9 ++-- pandas/core/reshape/tile.py | 3 +- pandas/io/common.py | 6 ++- pandas/io/excel/_base.py | 6 ++- pandas/io/excel/_odfreader.py | 9 ++-- pandas/io/excel/_openpyxl.py | 3 +- pandas/io/excel/_util.py | 3 +- pandas/io/excel/_xlrd.py | 6 ++- pandas/io/excel/_xlwt.py | 3 +- pandas/io/formats/excel.py | 3 +- pandas/io/formats/format.py | 9 ++-- pandas/io/html.py | 3 +- pandas/io/parsers.py | 3 +- pandas/io/pytables.py | 51 +++++++++++++------- pandas/io/sas/sas7bdat.py | 3 +- pandas/io/sql.py | 12 +++-- pandas/io/stata.py | 18 ++++--- pandas/plotting/_matplotlib/core.py | 3 +- pandas/plotting/_matplotlib/tools.py | 3 +- pandas/tests/extension/arrow/arrays.py | 3 +- pandas/tests/extension/base/__init__.py | 3 +- pandas/tests/extension/base/ops.py | 3 +- pandas/tests/extension/conftest.py | 15 ++++-- pandas/tests/extension/json/array.py | 3 +- pandas/tests/groupby/conftest.py | 3 +- pandas/tests/indexing/common.py | 3 +- pandas/tests/indexing/multiindex/conftest.py | 3 +- pandas/tests/io/conftest.py | 3 +- pandas/tests/io/pytables/common.py | 3 +- pandas/tests/resample/conftest.py | 24 ++++++--- pandas/util/_validators.py | 6 ++- 54 files changed, 295 insertions(+), 148 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 8b6116d3abd60..885b141ae9143 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -82,7 +82,8 @@ class OptionError(AttributeError, KeyError): - """Exception for pandas.options, backwards compatible with KeyError + """ + Exception for pandas.options, backwards compatible with KeyError checks """ @@ -546,7 +547,8 @@ def deprecate_option( def _select_options(pat: str) -> List[str]: - """returns a list of keys matching `pat` + """ + returns a list of keys matching `pat` if pat=="all", returns all registered options """ @@ -716,7 +718,8 @@ def pp(name: str, ks: Iterable[str]) -> List[str]: @contextmanager def config_prefix(prefix): - """contextmanager for multiple invocations of API with a common prefix + """ + contextmanager for multiple invocations of API with a common prefix supported API functions: (register / get / set )__option diff --git a/pandas/_testing.py b/pandas/_testing.py index 13af8703cef93..a13e404aead59 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -743,7 +743,8 @@ def repr_class(x): def assert_attr_equal(attr, left, right, obj="Attributes"): - """checks attributes are equal. Both objects must have attribute. + """ + checks attributes are equal. Both objects must have attribute. Parameters ---------- @@ -820,7 +821,8 @@ def assert_is_sorted(seq): def assert_categorical_equal( left, right, check_dtype=True, check_category_order=True, obj="Categorical" ): - """Test that Categoricals are equivalent. + """ + Test that Categoricals are equivalent. Parameters ---------- @@ -860,7 +862,8 @@ def assert_categorical_equal( def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray"): - """Test that two IntervalArrays are equivalent. + """ + Test that two IntervalArrays are equivalent. Parameters ---------- @@ -1009,7 +1012,8 @@ def _raise(left, right, err_msg): def assert_extension_array_equal( left, right, check_dtype=True, check_less_precise=False, check_exact=False ): - """Check that left and right ExtensionArrays are equal. + """ + Check that left and right ExtensionArrays are equal. Parameters ---------- @@ -1489,7 +1493,8 @@ def assert_sp_array_equal( check_fill_value=True, consolidate_block_indices=False, ): - """Check that the left and right SparseArray are equal. + """ + Check that the left and right SparseArray are equal. Parameters ---------- @@ -1725,7 +1730,8 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None) def all_index_generator(k=10): - """Generator which can be iterated over to get instances of all the various + """ + Generator which can be iterated over to get instances of all the various index classes. Parameters @@ -1764,7 +1770,8 @@ def index_subclass_makers_generator(): def all_timeseries_index_generator(k=10): - """Generator which can be iterated over to get instances of all the classes + """ + Generator which can be iterated over to get instances of all the classes which represent time-series. Parameters @@ -1855,7 +1862,8 @@ def makePeriodFrame(nper=None): def makeCustomIndex( nentries, nlevels, prefix="#", names=False, ndupe_l=None, idx_type=None ): - """Create an index/multindex with given dimensions, levels, names, etc' + """ + Create an index/multindex with given dimensions, levels, names, etc' nentries - number of entries in index nlevels - number of levels (> 1 produces multindex) @@ -2143,7 +2151,8 @@ def makeMissingDataframe(density=0.9, random_state=None): def optional_args(decorator): - """allows a decorator to take optional positional and keyword arguments. + """ + allows a decorator to take optional positional and keyword arguments. Assumes that taking a single, callable, positional argument means that it is decorating a function, i.e. something like this:: @@ -2214,7 +2223,8 @@ def _get_default_network_errors(): def can_connect(url, error_classes=None): - """Try to connect to the given url. True if succeeds, False if IOError + """ + Try to connect to the given url. True if succeeds, False if IOError raised Parameters @@ -2584,7 +2594,8 @@ def use_numexpr(use, min_elements=None): def test_parallel(num_threads=2, kwargs_list=None): - """Decorator to run the same function multiple times in parallel. + """ + Decorator to run the same function multiple times in parallel. Parameters ---------- diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index 588bd24ddf797..a84dbb4a661e4 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -5,7 +5,8 @@ class DeepChainMap(ChainMap[_KT, _VT]): - """Variant of ChainMap that allows direct updates to inner scopes. + """ + Variant of ChainMap that allows direct updates to inner scopes. Only works when all passed mapping are mutable. """ diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 79b87f146b9a7..448f84d58d7a0 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -98,7 +98,8 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i def _make_unique_kwarg_list( seq: Sequence[Tuple[Any, Any]] ) -> Sequence[Tuple[Any, Any]]: - """Uniquify aggfunc name of the pairs in the order list + """ + Uniquify aggfunc name of the pairs in the order list Examples: -------- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c3c91cea43f6b..b5da6d4c11616 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1,4 +1,5 @@ -"""An interface for extending pandas with custom arrays. +""" +An interface for extending pandas with custom arrays. .. warning:: @@ -213,7 +214,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): @classmethod def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): - """Construct a new ExtensionArray from a sequence of strings. + """ + Construct a new ExtensionArray from a sequence of strings. .. versionadded:: 0.24.0 @@ -961,7 +963,8 @@ def __repr__(self) -> str: return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: - """Formatting function for scalar values. + """ + Formatting function for scalar values. This is used in the default '__repr__'. The returned formatting function receives instances of your scalar type. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d26ff7490e714..f9afc68ae4ff9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1888,7 +1888,8 @@ def __contains__(self, key) -> bool: return contains(self, key, container=self._codes) def _tidy_repr(self, max_vals=10, footer=True) -> str: - """ a short repr displaying only max_vals and an optional (but default + """ + a short repr displaying only max_vals and an optional (but default footer) """ num = max_vals // 2 diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 03c8e48c6e699..5190df8451212 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -134,7 +134,8 @@ def _simple_new(cls, values, **kwargs): @property def _scalar_type(self) -> Type[DatetimeLikeScalar]: - """The scalar associated with this datelike + """ + The scalar associated with this datelike * PeriodArray : Period * DatetimeArray : Timestamp diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 5888600d2fa8e..6701396ea10bd 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -819,7 +819,7 @@ def tz_convert(self, tz): dtype = tz_to_dtype(tz) return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): # noqa """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 4bfd5f5770b69..febbb555c5821 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -479,7 +479,8 @@ def astype(self, dtype, copy=True): @property def _ndarray_values(self) -> np.ndarray: - """Internal pandas method for lossy conversion to a NumPy ndarray. + """ + Internal pandas method for lossy conversion to a NumPy ndarray. This method is not part of the pandas interface. @@ -494,7 +495,8 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: return self.to_numpy(na_value=np.nan), np.nan def _values_for_argsort(self) -> np.ndarray: - """Return values for sorting. + """ + Return values for sorting. Returns ------- diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 0b35a031bc53f..ce05aa8586392 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -460,7 +460,8 @@ def from_tuples(cls, data, closed="right", copy=False, dtype=None): return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) def _validate(self): - """Verify that the IntervalArray is valid. + """ + Verify that the IntervalArray is valid. Checks that diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 17a953fce9ec0..e6edaa40a9fdf 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -17,7 +17,8 @@ def _check_is_partition(parts, whole): def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): - """ For arbitrary (MultiIndexed) sparse Series return + """ + For arbitrary (MultiIndexed) sparse Series return (v, i, j, ilabels, jlabels) where (v, (i, j)) is suitable for passing to scipy.sparse.coo constructor. """ # index and column levels must be a partition of the index @@ -44,7 +45,8 @@ def get_indexers(levels): # labels_to_i[:] = np.arange(labels_to_i.shape[0]) def _get_label_to_i_dict(labels, sort_labels=False): - """ Return dict of unique labels to number. + """ + Return dict of unique labels to number. Optionally sort by label. """ labels = Index(map(tuple, labels)).unique().tolist() # squish diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 5563d3ae27118..7ed089b283903 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -1,4 +1,5 @@ -"""Operator classes for eval. +""" +Operator classes for eval. """ from datetime import datetime @@ -248,7 +249,8 @@ def is_datetime(self) -> bool: def _in(x, y): - """Compute the vectorized membership of ``x in y`` if possible, otherwise + """ + Compute the vectorized membership of ``x in y`` if possible, otherwise use Python. """ try: @@ -263,7 +265,8 @@ def _in(x, y): def _not_in(x, y): - """Compute the vectorized membership of ``x not in y`` if possible, + """ + Compute the vectorized membership of ``x not in y`` if possible, otherwise use Python. """ try: @@ -445,7 +448,8 @@ def evaluate(self, env, engine: str, parser, term_type, eval_in_python): return term_type(name, env=env) def convert_values(self): - """Convert datetimes to a comparable value in an expression. + """ + Convert datetimes to a comparable value in an expression. """ def stringify(value): diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index ce213c8532834..92a2c20cd2a9e 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -1,4 +1,5 @@ -""":func:`~pandas.eval` source string parsing functions +""" +:func:`~pandas.eval` source string parsing functions """ from io import StringIO diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index be652ca0e6a36..ffb75c3a43ecb 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -150,7 +150,8 @@ def is_valid(self) -> bool: @property def is_in_table(self) -> bool: - """ return True if this is a valid column name for generation (e.g. an + """ + return True if this is a valid column name for generation (e.g. an actual column in the table) """ return self.queryables.get(self.lhs) is not None @@ -175,7 +176,8 @@ def generate(self, v) -> str: return f"({self.lhs} {self.op} {val})" def convert_value(self, v) -> "TermValue": - """ convert the expression that is in the term to something that is + """ + convert the expression that is in the term to something that is accepted by pytables """ def stringify(value): @@ -601,7 +603,8 @@ def __init__(self, value, converted, kind: str): self.kind = kind def tostring(self, encoding) -> str: - """ quote the string if not encoded + """ + quote the string if not encoded else encode and return """ if self.kind == "string": if encoding is not None: diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 70dcf4defdb52..937c81fdeb8d6 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -31,7 +31,8 @@ def ensure_scope( def _replacer(x) -> str: - """Replace a number with its hexadecimal representation. Used to tag + """ + Replace a number with its hexadecimal representation. Used to tag temporary variables with their calling scope's id. """ # get the hex repr of the binary char and remove 0x and pad by pad_size diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6120bc92adbfc..4fdde364925bc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -107,7 +107,8 @@ def is_nested_object(obj) -> bool: def maybe_downcast_to_dtype(result, dtype): - """ try to cast to the specified dtype (e.g. convert back to bool/int + """ + try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ do_round = False @@ -754,7 +755,8 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): def invalidate_string_dtypes(dtype_set): - """Change string like dtypes to object for + """ + Change string like dtypes to object for ``DataFrame.select_dtypes()``. """ non_string_dtypes = dtype_set - {np.dtype("S").type, np.dtype(" bool: def concat_categorical(to_concat, axis: int = 0): - """Concatenate an object/categorical array of arrays, each of which is a + """ + Concatenate an object/categorical array of arrays, each of which is a single dtype Parameters diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 004b92176f030..36779fe2bdb13 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -277,21 +277,24 @@ def _validate_dtype(self, dtype): @property def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]: - """Used when a manipulation result has the same dimensions as the + """ + Used when a manipulation result has the same dimensions as the original. """ raise AbstractMethodError(self) @property def _constructor_sliced(self): - """Used when a manipulation result has one lower dimension(s) as the + """ + Used when a manipulation result has one lower dimension(s) as the original, such as DataFrame single columns slicing. """ raise AbstractMethodError(self) @property def _constructor_expanddim(self): - """Used when a manipulation result has one higher dimension as the + """ + Used when a manipulation result has one higher dimension as the original, such as Series.to_frame() """ raise NotImplementedError @@ -342,7 +345,8 @@ def _construct_axes_dict(self, axes=None, **kwargs): def _construct_axes_from_arguments( self, args, kwargs, require_all: bool = False, sentinel=None ): - """Construct and returns axes if supplied in args/kwargs. + """ + Construct and returns axes if supplied in args/kwargs. If require_all, raise if all axis arguments are not supplied return a tuple of (axes, kwargs). @@ -1736,7 +1740,8 @@ def keys(self): return self._info_axis def items(self): - """Iterate over (label, values) on info axis + """ + Iterate over (label, values) on info axis This is index for Series and columns for DataFrame. @@ -3120,18 +3125,21 @@ def to_csv( # Lookup Caching def _set_as_cached(self, item, cacher) -> None: - """Set the _cacher attribute on the calling object with a weakref to + """ + Set the _cacher attribute on the calling object with a weakref to cacher. """ self._cacher = (item, weakref.ref(cacher)) def _reset_cacher(self) -> None: - """Reset the cacher.""" + """ + Reset the cacher.""" if hasattr(self, "_cacher"): del self._cacher def _maybe_cache_changed(self, item, value) -> None: - """The object has called back to us saying maybe it has changed. + """ + The object has called back to us saying maybe it has changed. """ self._data.set(item, value) @@ -5086,7 +5094,8 @@ def __finalize__( return self def __getattr__(self, name: str): - """After regular attribute access, try looking up the name + """ + After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ @@ -5105,7 +5114,8 @@ def __getattr__(self, name: str): return object.__getattribute__(self, name) def __setattr__(self, name: str, value) -> None: - """After regular attribute access, try setting the name + """ + After regular attribute access, try setting the name This allows simpler access to columns for interactive use. """ @@ -5146,7 +5156,8 @@ def __setattr__(self, name: str, value) -> None: object.__setattr__(self, name, value) def _dir_additions(self): - """ add the string-like attributes from the info_axis. + """ + add the string-like attributes from the info_axis. If info_axis is a MultiIndex, it's first level values are used. """ additions = { @@ -5160,7 +5171,8 @@ def _dir_additions(self): # Consolidation of internals def _protect_consolidate(self, f): - """Consolidate _data -- if the blocks have changed, then clear the + """ + Consolidate _data -- if the blocks have changed, then clear the cache """ blocks_before = len(self._data.blocks) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7373f41daefa4..9d0a2b93a9dfc 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -520,7 +520,8 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): return new_target, indexer def _reindex_non_unique(self, target): - """ reindex from a non-unique; which CategoricalIndex's are almost + """ + reindex from a non-unique; which CategoricalIndex's are almost always """ new_target, indexer = self.reindex(target) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b2e5d04247e81..b2ae06d5dc3df 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -86,7 +86,8 @@ class IndexingError(Exception): class IndexingMixin: - """Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. + """ + Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. """ @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 536aa53c95fba..ef6b1932ab24e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -159,7 +159,8 @@ def _check_ndim(self, values, ndim): @property def _holder(self): - """The array-like that can hold the underlying values. + """ + The array-like that can hold the underlying values. None for 'Block', overridden by subclasses that don't use an ndarray. @@ -257,7 +258,8 @@ def mgr_locs(self, new_mgr_locs): @property def array_dtype(self): - """ the dtype to return if I want to construct this block as an + """ + the dtype to return if I want to construct this block as an array """ return self.dtype @@ -383,7 +385,8 @@ def delete(self, loc): self.mgr_locs = self.mgr_locs.delete(loc) def apply(self, func, **kwargs) -> List["Block"]: - """ apply the function to my values; return a block if we are not + """ + apply the function to my values; return a block if we are not one """ with np.errstate(all="ignore"): @@ -409,7 +412,8 @@ def _split_op_result(self, result) -> List["Block"]: return [result] def fillna(self, value, limit=None, inplace=False, downcast=None): - """ fillna on the block with the value. If we fail, then convert to + """ + fillna on the block with the value. If we fail, then convert to ObjectBlock and try again """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -659,7 +663,8 @@ def convert( timedelta: bool = True, coerce: bool = False, ): - """ attempt to coerce any object types to better types return a copy + """ + attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we are not an ObjectBlock here! """ @@ -705,7 +710,8 @@ def copy(self, deep=True): def replace( self, to_replace, value, inplace=False, filter=None, regex=False, convert=True ): - """replace the to_replace value with value, possible to create new + """ + replace the to_replace value with value, possible to create new blocks here this is just a call to putmask. regex is not used here. It is used in ObjectBlocks. It is here for API compatibility. """ @@ -926,7 +932,8 @@ def setitem(self, indexer, value): return block def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): - """ putmask the data to the block; it is possible that we may create a + """ + putmask the data to the block; it is possible that we may create a new dtype of block return the resulting block(s) @@ -1465,7 +1472,8 @@ def equals(self, other) -> bool: return array_equivalent(self.values, other.values) def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): - """Return a list of unstacked blocks of self + """ + Return a list of unstacked blocks of self Parameters ---------- @@ -1604,7 +1612,8 @@ class NonConsolidatableMixIn: _validate_ndim = False def __init__(self, values, placement, ndim=None): - """Initialize a non-consolidatable block. + """ + Initialize a non-consolidatable block. 'ndim' may be inferred from 'placement'. @@ -1719,7 +1728,8 @@ def _get_unstack_items(self, unstacker, new_columns): class ExtensionBlock(NonConsolidatableMixIn, Block): - """Block for holding extension types. + """ + Block for holding extension types. Notes ----- @@ -1777,7 +1787,8 @@ def is_numeric(self): return self.values.dtype._is_numeric def setitem(self, indexer, value): - """Set the value inplace, returning a same-typed block. + """ + Set the value inplace, returning a same-typed block. This differs from Block.setitem by not allowing setitem to change the dtype of the Block. @@ -2314,7 +2325,8 @@ def _holder(self): return DatetimeArray def _maybe_coerce_values(self, values): - """Input validation for values passed to __init__. Ensure that + """ + Input validation for values passed to __init__. Ensure that we have datetime64TZ, coercing if necessary. Parameters @@ -2604,7 +2616,8 @@ def __init__(self, values, placement=None, ndim=2): @property def is_bool(self): - """ we can be a bool if we have only bool values but are of type + """ + we can be a bool if we have only bool values but are of type object """ return lib.is_bool_array(self.values.ravel()) @@ -2617,7 +2630,8 @@ def convert( timedelta: bool = True, coerce: bool = False, ): - """ attempt to coerce any object types to better types return a copy of + """ + attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! can return multiple blocks! @@ -2911,7 +2925,8 @@ def _holder(self): @property def array_dtype(self): - """ the dtype to return if I want to construct this block as an + """ + the dtype to return if I want to construct this block as an array """ return np.object_ diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 798386825d802..7bb96e2fe191a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -535,7 +535,8 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): - """Convert list of dicts to numpy arrays + """ + Convert list of dicts to numpy arrays if `columns` is not passed, column names are inferred from the records - for OrderedDict and dicts, the column names match diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 08ae0b02169d4..9911367832ef7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1762,7 +1762,8 @@ def form_blocks(arrays, names, axes): def _simple_blockify(tuples, dtype): - """ return a single array of a block that has a single dtype; if dtype is + """ + return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype """ values, placement = _stack_arrays(tuples, dtype) @@ -1822,7 +1823,8 @@ def _shape_compat(x): def _interleaved_dtype( blocks: List[Block], ) -> Optional[Union[np.dtype, ExtensionDtype]]: - """Find the common dtype for `blocks`. + """ + Find the common dtype for `blocks`. Parameters ---------- diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2bf2be082f639..266b8139bd36a 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -654,7 +654,8 @@ def _get_counts_nanvar( ddof: int, dtype: Dtype = float, ) -> Tuple[Union[int, np.ndarray], Union[int, np.ndarray]]: - """ Get the count of non-null values along an axis, accounting + """ + Get the count of non-null values along an axis, accounting for degrees of freedom. Parameters @@ -959,7 +960,8 @@ def nanskew( skipna: bool = True, mask: Optional[np.ndarray] = None, ) -> float: - """ Compute the sample skewness. + """ + Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly @@ -1197,7 +1199,8 @@ def _get_counts( axis: Optional[int], dtype: Dtype = float, ) -> Union[int, np.ndarray]: - """ Get the count of non-null values along an axis + """ + Get the count of non-null values along an axis Parameters ---------- diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index a18b45a077be0..0253459b254ae 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -589,7 +589,8 @@ def _round_frac(x, precision: int): def _infer_precision(base_precision: int, bins) -> int: - """Infer an appropriate precision for _round_frac + """ + Infer an appropriate precision for _round_frac """ for precision in range(base_precision, 20): levels = [_round_frac(b, precision) for b in bins] diff --git a/pandas/io/common.py b/pandas/io/common.py index c4772895afd1e..900e8489e8ac6 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -74,7 +74,8 @@ def is_url(url) -> bool: def _expand_user( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: - """Return the argument with an initial component of ~ or ~user + """ + Return the argument with an initial component of ~ or ~user replaced by that user's home directory. Parameters @@ -103,7 +104,8 @@ def validate_header_arg(header) -> None: def stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: - """Attempt to convert a path-like object to a string. + """ + Attempt to convert a path-like object to a string. Parameters ---------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 5ad56e30eeb39..980d1462e184d 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -721,7 +721,8 @@ def _get_sheet_name(self, sheet_name): return sheet_name def _value_with_fmt(self, val): - """Convert numpy types to Python types for the Excel writers. + """ + Convert numpy types to Python types for the Excel writers. Parameters ---------- @@ -755,7 +756,8 @@ def _value_with_fmt(self, val): @classmethod def check_extension(cls, ext): - """checks that path's extension against the Writer's supported + """ + checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError.""" if ext.startswith("."): ext = ext[1:] diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index ec5f6fcb17ff8..7af776dc1a10f 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -64,7 +64,8 @@ def get_sheet_by_name(self, name: str): raise ValueError(f"sheet {name} not found") def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - """Parse an ODF Table into a list of lists + """ + Parse an ODF Table into a list of lists """ from odf.table import CoveredTableCell, TableCell, TableRow @@ -120,7 +121,8 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: return table def _get_row_repeat(self, row) -> int: - """Return number of times this row was repeated + """ + Return number of times this row was repeated Repeating an empty row appeared to be a common way of representing sparse rows in the table. """ @@ -134,7 +136,8 @@ def _get_column_repeat(self, cell) -> int: return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1)) def _is_empty_row(self, row) -> bool: - """Helper function to find empty rows + """ + Helper function to find empty rows """ for column in row.childNodes: if len(column.childNodes) > 0: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index be52523e486af..8ec27bb937297 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -474,7 +474,8 @@ def write_cells( class _OpenpyxlReader(_BaseExcelReader): def __init__(self, filepath_or_buffer: FilePathOrBuffer) -> None: - """Reader using openpyxl engine. + """ + Reader using openpyxl engine. Parameters ---------- diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 9d284c8031840..44ac2045fbca1 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -171,7 +171,8 @@ def _trim_excel_header(row): def _fill_mi_header(row, control_row): - """Forward fill blank entries in row but only inside the same parent index. + """ + Forward fill blank entries in row but only inside the same parent index. Used for creating headers in Multiindex. Parameters diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index be1b78eeb146e..9c60f4b78ae5a 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -9,7 +9,8 @@ class _XlrdReader(_BaseExcelReader): def __init__(self, filepath_or_buffer): - """Reader using xlrd engine. + """ + Reader using xlrd engine. Parameters ---------- @@ -57,7 +58,8 @@ def get_sheet_data(self, sheet, convert_float): epoch1904 = self.book.datemode def _parse_cell(cell_contents, cell_typ): - """converts the contents of the cell into a pandas + """ + converts the contents of the cell into a pandas appropriate object""" if cell_typ == XL_CELL_DATE: diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index d102a885cef0a..78efe77e9fe2d 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -80,7 +80,8 @@ def write_cells( def _style_to_xlwt( cls, item, firstlevel: bool = True, field_sep=",", line_sep=";" ) -> str: - """helper which recursively generate an xlwt easy style string + """ + helper which recursively generate an xlwt easy style string for example: hstyle = {"font": {"bold": True}, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 28a069bc9fc1b..aac1df5dcd396 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -41,7 +41,8 @@ def __init__( class CSSToExcelConverter: - """A callable for converting CSS declarations to ExcelWriter styles + """ + A callable for converting CSS declarations to ExcelWriter styles Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), focusing on font styling, backgrounds, borders and alignment. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 149533bf0c238..efcc2d2777413 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1296,7 +1296,8 @@ def _value_formatter( float_format: Optional[float_format_type] = None, threshold: Optional[Union[float, int]] = None, ) -> Callable: - """Returns a function to be applied on each value to format it + """ + Returns a function to be applied on each value to format it """ # the float_format parameter supersedes self.float_format @@ -1842,7 +1843,8 @@ def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False) self.use_eng_prefix = use_eng_prefix def __call__(self, num: Union[int, float]) -> str: - """ Formats a number in engineering notation, appending a letter + """ + Formats a number in engineering notation, appending a letter representing the power of 1000 of the original number. Some examples: >>> format_eng(0) # for self.accuracy = 0 @@ -1941,7 +1943,8 @@ def _binify(cols: List[int], line_width: int) -> List[int]: def get_level_lengths( levels: Any, sentinel: Union[bool, object, str] = "" ) -> List[Dict[int, int]]: - """For each index in each level the function returns lengths of indexes. + """ + For each index in each level the function returns lengths of indexes. Parameters ---------- diff --git a/pandas/io/html.py b/pandas/io/html.py index c676bfb1f0c74..b4cc6c735ba0a 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -602,7 +602,8 @@ def _build_doc(self): def _build_xpath_expr(attrs) -> str: - """Build an xpath expression to simulate bs4's ability to pass in kwargs to + """ + Build an xpath expression to simulate bs4's ability to pass in kwargs to search for attributes when using the lxml parser. Parameters diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8bc8470ae7658..fea088cf9f8d6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1457,7 +1457,8 @@ def _should_parse_dates(self, i): def _extract_multi_indexer_columns( self, header, index_names, col_names, passed_names=False ): - """ extract and return the names, index_names, col_names + """ + extract and return the names, index_names, col_names header is a list-of-lists returned from the parsers """ if len(header) < 2: return header[0], index_names, col_names, passed_names diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0e2b909d5cdc7..401d7b9440f00 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -569,7 +569,8 @@ def __getattr__(self, name: str): ) def __contains__(self, key: str) -> bool: - """ check for existence of this key + """ + check for existence of this key can match the exact pathname or the pathnm w/o the leading '/' """ node = self.get_node(key) @@ -1831,7 +1832,8 @@ def get_result(self, coordinates: bool = False): class IndexCol: - """ an index column description class + """ + an index column description class Parameters ---------- @@ -1999,7 +2001,8 @@ def __iter__(self): return iter(self.values) def maybe_set_size(self, min_itemsize=None): - """ maybe set a string col itemsize: + """ + maybe set a string col itemsize: min_itemsize can be an integer or a dict with this columns name with an integer size """ if _ensure_decoded(self.kind) == "string": @@ -2051,7 +2054,8 @@ def validate_attr(self, append: bool): ) def update_info(self, info): - """ set/update the info for this indexable with the key/value + """ + set/update the info for this indexable with the key/value if there is a conflict raise/warn as needed """ for key in self._info_fields: @@ -2140,7 +2144,8 @@ def set_attr(self): class DataCol(IndexCol): - """ a data holding column, by definition this is not indexable + """ + a data holding column, by definition this is not indexable Parameters ---------- @@ -2460,7 +2465,8 @@ class GenericDataIndexableCol(DataIndexableCol): class Fixed: - """ represent an object in my store + """ + represent an object in my store facilitate read/write of various types of objects this is an abstract base class @@ -2596,7 +2602,8 @@ def validate_version(self, where=None): return True def infer_axes(self): - """ infer the axes of my storer + """ + infer the axes of my storer return a boolean indicating if we have a valid storer or not """ s = self.storable @@ -3105,7 +3112,8 @@ class FrameFixed(BlockManagerFixed): class Table(Fixed): - """ represent a table: + """ + represent a table: facilitate read/write of various types of tables Attrs in Table Node @@ -3229,7 +3237,8 @@ def is_multi_index(self) -> bool: return isinstance(self.levels, list) def validate_multiindex(self, obj): - """validate that we can store the multi-index; reset and return the + """ + validate that we can store the multi-index; reset and return the new object """ levels = [ @@ -3381,7 +3390,8 @@ def validate_version(self, where=None): warnings.warn(ws, IncompatibilityWarning) def validate_min_itemsize(self, min_itemsize): - """validate the min_itemsize doesn't contain items that are not in the + """ + validate the min_itemsize doesn't contain items that are not in the axes this needs data_columns to be defined """ if min_itemsize is None: @@ -3595,7 +3605,8 @@ def get_object(cls, obj, transposed: bool): return obj def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): - """take the input data_columns and min_itemize and create a data + """ + take the input data_columns and min_itemize and create a data columns spec """ @@ -4011,7 +4022,8 @@ def create_description( def read_coordinates( self, where=None, start: Optional[int] = None, stop: Optional[int] = None, ): - """select coordinates (row numbers) from a table; return the + """ + select coordinates (row numbers) from a table; return the coordinates object """ @@ -4041,7 +4053,8 @@ def read_column( start: Optional[int] = None, stop: Optional[int] = None, ): - """return a single column from the table, generally only indexables + """ + return a single column from the table, generally only indexables are interesting """ @@ -4080,7 +4093,8 @@ def read_column( class WORMTable(Table): - """ a write-once read-many table: this format DOES NOT ALLOW appending to a + """ + a write-once read-many table: this format DOES NOT ALLOW appending to a table. writing is a one-time operation the data are stored in a format that allows for searching the data on disk """ @@ -4094,12 +4108,14 @@ def read( start: Optional[int] = None, stop: Optional[int] = None, ): - """ read the indices and the indexing array, calculate offset rows and + """ + read the indices and the indexing array, calculate offset rows and return """ raise NotImplementedError("WORMTable needs to implement read") def write(self, **kwargs): - """ write in a format that we can search later on (but cannot append + """ + write in a format that we can search later on (but cannot append to): write out the indices and the values using _write_array (e.g. a CArray) create an indexing table so that we can search """ @@ -4170,7 +4186,8 @@ def write( table.write_data(chunksize, dropna=dropna) def write_data(self, chunksize: Optional[int], dropna: bool = False): - """ we form the data into a 2-d including indexes,values,mask + """ + we form the data into a 2-d including indexes,values,mask write chunk-by-chunk """ names = self.dtype.names diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 9b40778dbcfdf..9937bd8c7290b 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -120,7 +120,8 @@ def column_data_offsets(self): return np.asarray(self._column_data_offsets, dtype=np.int64) def column_types(self): - """Returns a numpy character array of the column types: + """ + Returns a numpy character array of the column types: s (string) or d (double)""" return np.asarray(self._column_types, dtype=np.dtype("S1")) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 58fed0d18dd4a..af25861056625 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -655,7 +655,8 @@ def create(self): self._execute_create() def _execute_insert(self, conn, keys, data_iter): - """Execute SQL statement inserting data + """ + Execute SQL statement inserting data Parameters ---------- @@ -669,7 +670,8 @@ def _execute_insert(self, conn, keys, data_iter): conn.execute(self.table.insert(), data) def _execute_insert_multi(self, conn, keys, data_iter): - """Alternative to _execute_insert for DBs support multivalue INSERT. + """ + Alternative to _execute_insert for DBs support multivalue INSERT. Note: multi-value insert is usually faster for analytics DBs and tables containing a few columns @@ -1095,7 +1097,8 @@ def read_table( schema=None, chunksize=None, ): - """Read SQL database table into a DataFrame. + """ + Read SQL database table into a DataFrame. Parameters ---------- @@ -1172,7 +1175,8 @@ def read_query( params=None, chunksize=None, ): - """Read SQL query into a DataFrame. + """ + Read SQL query into a DataFrame. Parameters ---------- diff --git a/pandas/io/stata.py b/pandas/io/stata.py index d651fe9f67773..051dfd270f6ce 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -482,7 +482,8 @@ class InvalidColumnName(Warning): def _cast_to_stata_types(data: DataFrame) -> DataFrame: - """Checks the dtypes of the columns of a pandas DataFrame for + """ + Checks the dtypes of the columns of a pandas DataFrame for compatibility with the data types and ranges supported by Stata, and converts if necessary. @@ -2132,7 +2133,8 @@ def _write_bytes(self, value: bytes) -> None: self._file.write(value) def _prepare_categoricals(self, data: DataFrame) -> DataFrame: - """Check for categorical columns, retain categorical information for + """ + Check for categorical columns, retain categorical information for Stata file and convert categorical data to int""" is_cat = [is_categorical_dtype(data[col]) for col in data] @@ -2174,7 +2176,8 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: def _replace_nans(self, data: DataFrame) -> DataFrame: # return data - """Checks floating point data columns for nans, and replaces these with + """ + Checks floating point data columns for nans, and replaces these with the generic Stata for missing value (.)""" for c in data: dtype = data[c].dtype @@ -3039,7 +3042,8 @@ def _write_header( self._write_bytes(self._tag(bio.read(), "header")) def _write_map(self) -> None: - """Called twice during file write. The first populates the values in + """ + Called twice during file write. The first populates the values in the map with 0s. The second call writes the final map locations when all blocks have been written.""" assert self._file is not None @@ -3188,7 +3192,8 @@ def _write_file_close_tag(self) -> None: self._update_map("end-of-file") def _update_strl_names(self) -> None: - """Update column names for conversion to strl if they might have been + """ + Update column names for conversion to strl if they might have been changed to comply with Stata naming rules""" # Update convert_strl if names changed for orig, new in self._converted_names.items(): @@ -3197,7 +3202,8 @@ def _update_strl_names(self) -> None: self._convert_strl[idx] = new def _convert_strls(self, data: DataFrame) -> DataFrame: - """Convert columns to StrLs if either very large or in the + """ + Convert columns to StrLs if either very large or in the convert_strl variable""" convert_cols = [ col diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index de09460bb833d..5dfabf21115dc 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -509,7 +509,8 @@ def _adorn_subplots(self): self.axes[0].set_title(self.title) def _apply_axis_properties(self, axis, rot=None, fontsize=None): - """ Tick creation within matplotlib is reasonably expensive and is + """ + Tick creation within matplotlib is reasonably expensive and is internally deferred until accessed as Ticks are created/destroyed multiple times per draw. It's therefore beneficial for us to avoid accessing unless we will act on the Tick. diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index d7732c86911b8..b77bd3b35b0f3 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -100,7 +100,8 @@ def _subplots( layout_type="box", **fig_kw, ): - """Create a figure with a set of subplots already made. + """ + Create a figure with a set of subplots already made. This utility wrapper makes it convenient to create common layouts of subplots, including the enclosing figure object, in a single call. diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index b67ca4cfab83d..cd4b43c83340f 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -1,4 +1,5 @@ -"""Rudimentary Apache Arrow-backed ExtensionArray. +""" +Rudimentary Apache Arrow-backed ExtensionArray. At the moment, just a boolean array / type is implemented. Eventually, we'll want to parametrize the type and support diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index e2b6ea0304f6a..323cb843b2d74 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -1,4 +1,5 @@ -"""Base test suite for extension arrays. +""" +Base test suite for extension arrays. These tests are intended for third-party libraries to subclass to validate that their extension arrays and dtypes satisfy the interface. Moving or diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 0609f19c8e0c3..4009041218ac2 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -51,7 +51,8 @@ def _check_divmod_op(self, s, op, other, exc=Exception): class BaseArithmeticOpsTests(BaseOpsUtil): - """Various Series and DataFrame arithmetic ops methods. + """ + Various Series and DataFrame arithmetic ops methods. Subclasses supporting various ops should set the class variables to indicate that they support ops of that kind diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index d37638d37e4d6..1942d737780da 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -13,7 +13,8 @@ def dtype(): @pytest.fixture def data(): - """Length-100 array for this type. + """ + Length-100 array for this type. * data[0] and data[1] should both be non missing * data[0] and data[1] should not be equal @@ -67,7 +68,8 @@ def gen(count): @pytest.fixture def data_for_sorting(): - """Length-3 array with a known sort order. + """ + Length-3 array with a known sort order. This should be three items [B, C, A] with A < B < C @@ -77,7 +79,8 @@ def data_for_sorting(): @pytest.fixture def data_missing_for_sorting(): - """Length-3 array with a known sort order. + """ + Length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. @@ -87,7 +90,8 @@ def data_missing_for_sorting(): @pytest.fixture def na_cmp(): - """Binary operator for comparing NA values. + """ + Binary operator for comparing NA values. Should return a function of two arguments that returns True if both arguments are (scalar) NA for your type. @@ -105,7 +109,8 @@ def na_value(): @pytest.fixture def data_for_grouping(): - """Data for factorization, grouping, and unique tests. + """ + Data for factorization, grouping, and unique tests. Expected to be like [B, B, NA, NA, A, A, B, C] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 1ba1b872fa5e2..0b30aa1689751 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -1,4 +1,5 @@ -"""Test extension array for storing nested data in a pandas container. +""" +Test extension array for storing nested data in a pandas container. The JSONArray stores lists of dictionaries. The storage mechanism is a list, not an ndarray. diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index ebac36c5f8c78..1214734358c80 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -107,7 +107,8 @@ def three_group(): @pytest.fixture(params=sorted(reduction_kernels)) def reduction_func(request): - """yields the string names of all groupby reduction functions, one at a time. + """ + yields the string names of all groupby reduction functions, one at a time. """ return request.param diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 4804172a22529..c5c96ea8769d1 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -101,7 +101,8 @@ def setup_method(self, method): setattr(self, kind, d) def generate_indices(self, f, values=False): - """ generate the indices + """ + generate the indices if values is True , use the axis values is False, use the range """ diff --git a/pandas/tests/indexing/multiindex/conftest.py b/pandas/tests/indexing/multiindex/conftest.py index e6d5a9eb84410..54ebd8cd93956 100644 --- a/pandas/tests/indexing/multiindex/conftest.py +++ b/pandas/tests/indexing/multiindex/conftest.py @@ -20,7 +20,8 @@ def multiindex_dataframe_random_data(): @pytest.fixture def multiindex_year_month_day_dataframe_random_data(): - """DataFrame with 3 level MultiIndex (year, month, day) covering + """ + DataFrame with 3 level MultiIndex (year, month, day) covering first 100 business days from 2000-01-01 with random data""" tdf = tm.makeTimeDataFrame(100) ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 7810778602e12..fe71ca77a7dda 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -27,7 +27,8 @@ def salaries_table(datapath): @pytest.fixture def s3_resource(tips_file, jsonl_file): - """Fixture for mocking S3 interaction. + """ + Fixture for mocking S3 interaction. The primary bucket name is "pandas-test". The following datasets are loaded. diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py index d06f467760518..c5529d77dece5 100644 --- a/pandas/tests/io/pytables/common.py +++ b/pandas/tests/io/pytables/common.py @@ -74,7 +74,8 @@ def ensure_clean_path(path): def _maybe_remove(store, key): - """For tests using tables, try removing the table to be sure there is + """ + For tests using tables, try removing the table to be sure there is no content from previous tests using the same table name.""" try: store.remove(key) diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index bb4f7ced3350f..833c022944b20 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -98,14 +98,16 @@ def _index_name(): @pytest.fixture def index(_index_factory, _index_start, _index_end, _index_freq, _index_name): - """Fixture for parametrization of date_range, period_range and + """ + Fixture for parametrization of date_range, period_range and timedelta_range indexes""" return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name) @pytest.fixture def _static_values(index): - """Fixture for parametrization of values used in parametrization of + """ + Fixture for parametrization of values used in parametrization of Series and DataFrames with date_range, period_range and timedelta_range indexes""" return np.arange(len(index)) @@ -113,28 +115,32 @@ def _static_values(index): @pytest.fixture def _series_name(): - """Fixture for parametrization of Series name for Series used with + """ + Fixture for parametrization of Series name for Series used with date_range, period_range and timedelta_range indexes""" return None @pytest.fixture def series(index, _series_name, _static_values): - """Fixture for parametrization of Series with date_range, period_range and + """ + Fixture for parametrization of Series with date_range, period_range and timedelta_range indexes""" return Series(_static_values, index=index, name=_series_name) @pytest.fixture def empty_series(series): - """Fixture for parametrization of empty Series with date_range, + """ + Fixture for parametrization of empty Series with date_range, period_range and timedelta_range indexes""" return series[:0] @pytest.fixture def frame(index, _series_name, _static_values): - """Fixture for parametrization of DataFrame with date_range, period_range + """ + Fixture for parametrization of DataFrame with date_range, period_range and timedelta_range indexes""" # _series_name is intentionally unused return DataFrame({"value": _static_values}, index=index) @@ -142,7 +148,8 @@ def frame(index, _series_name, _static_values): @pytest.fixture def empty_frame(series): - """Fixture for parametrization of empty DataFrame with date_range, + """ + Fixture for parametrization of empty DataFrame with date_range, period_range and timedelta_range indexes""" index = series.index[:0] return DataFrame(index=index) @@ -150,7 +157,8 @@ def empty_frame(series): @pytest.fixture(params=[Series, DataFrame]) def series_and_frame(request, series, frame): - """Fixture for parametrization of Series and DataFrame with date_range, + """ + Fixture for parametrization of Series and DataFrame with date_range, period_range and timedelta_range indexes""" if request.param == Series: return series diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index a715094e65e98..3e53bab4b3d20 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -215,7 +215,8 @@ def validate_bool_kwarg(value, arg_name): def validate_axis_style_args(data, args, kwargs, arg_name, method_name): - """Argument handler for mixed index, columns / axis functions + """ + Argument handler for mixed index, columns / axis functions In an attempt to handle both `.method(index, columns)`, and `.method(arg, axis=.)`, we have to do some bad things to argument @@ -309,7 +310,8 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): - """Validate the keyword arguments to 'fillna'. + """ + Validate the keyword arguments to 'fillna'. This checks that exactly one of 'value' and 'method' is specified. If 'method' is specified, this validates that it's a valid method. From fda90aff095ac9f325e0c10bf3bd92b8cce51910 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 11 Feb 2020 21:44:48 +0000 Subject: [PATCH 2/4] remove # noqa --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6701396ea10bd..5888600d2fa8e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -819,7 +819,7 @@ def tz_convert(self, tz): dtype = tz_to_dtype(tz) return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): # noqa + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. From 4b479e242d59cc3a8f3bb5d5a6b9f860d6502742 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Feb 2020 07:15:50 +0000 Subject: [PATCH 3/4] tidy up --- pandas/core/generic.py | 3 ++- pandas/io/common.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9bc92d792f994..389d05d310f75 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3133,7 +3133,8 @@ def _set_as_cached(self, item, cacher) -> None: def _reset_cacher(self) -> None: """ - Reset the cacher.""" + Reset the cacher. + """ if hasattr(self, "_cacher"): del self._cacher diff --git a/pandas/io/common.py b/pandas/io/common.py index 900e8489e8ac6..3c26a282815bd 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -76,7 +76,7 @@ def _expand_user( ) -> FilePathOrBuffer[AnyStr]: """ Return the argument with an initial component of ~ or ~user - replaced by that user's home directory. + replaced by that user's home directory. Parameters ---------- From 955c1e3f035273b8d7cdf42140c7378765e6389e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 12 Feb 2020 07:20:48 +0000 Subject: [PATCH 4/4] $ pydocstyle.exe pandas --select=D207,D208,D209 --- pandas/_testing.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index a47cb4adae080..1647449ae8651 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1980,30 +1980,30 @@ def makeCustomDataframe( r_idx_type=None, ): """ - nrows, ncols - number of data rows/cols - c_idx_names, idx_names - False/True/list of strings, yields No names , + nrows, ncols - number of data rows/cols + c_idx_names, idx_names - False/True/list of strings, yields No names , default names or uses the provided names for the levels of the corresponding index. You can provide a single string when c_idx_nlevels ==1. - c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex - r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex - data_gen_f - a function f(row,col) which return the data value + c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex + r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex + data_gen_f - a function f(row,col) which return the data value at that position, the default generator used yields values of the form "RxCy" based on position. - c_ndupe_l, r_ndupe_l - list of integers, determines the number + c_ndupe_l, r_ndupe_l - list of integers, determines the number of duplicates for each label at a given level of the corresponding index. The default `None` value produces a multiplicity of 1 across all levels, i.e. a unique index. Will accept a partial list of length N < idx_nlevels, for just the first N levels. If ndupe doesn't divide nrows/ncol, the last label might have lower multiplicity. - dtype - passed to the DataFrame constructor as is, in case you wish to + dtype - passed to the DataFrame constructor as is, in case you wish to have more control in conjunction with a custom `data_gen_f` - r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". - If idx_type is not None, `idx_nlevels` must be 1. - "i"/"f" creates an integer/float index, - "s"/"u" creates a string/unicode index - "dt" create a datetime index. - "td" create a timedelta index. + r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s"/"u" creates a string/unicode index + "dt" create a datetime index. + "td" create a timedelta index. if unspecified, string labels will be generated.