diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2d78ce7db8090..0b9aae6676710 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,7 +122,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then MSG='Check for non-standard imports' ; echo $MSG invgrep -R --include="*.py*" -E "from pandas.core.common import " pandas invgrep -R --include="*.py*" -E "from collections.abc import " pandas - # invgrep -R --include="*.py*" -E "from numpy import nan " pandas # GH#24822 not yet implemented since the offending imports have not all been removed + invgrep -R --include="*.py*" -E "from numpy import nan " pandas RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for use of exec' ; echo $MSG diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 121c61d8d3623..35f30fb160a9c 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -287,7 +287,7 @@ def get_empty_dtype_and_na(join_units): return np.float64, np.nan if is_uniform_reindex(join_units): - # XXX: integrate property + # FIXME: integrate property empty_dtype = join_units[0].block.dtype upcasted_na = join_units[0].block.fill_value return empty_dtype, upcasted_na diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1c31542daa5de..5f4c9d41b340b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2035,7 +2035,7 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): values = b.values if copy: values = values.copy() - elif not copy: + else: values = values.view() b = b.make_block_same_class(values, placement=placement) elif is_uniform_join_units(join_units): diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 42f1d4e99108f..398fa9b0c1fc0 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -766,9 +766,9 @@ def f(self, other, axis=default_axis, level=None): return f -def _comp_method_FRAME(cls, func, special): - str_rep = _get_opstr(func) - op_name = _get_op_name(func, special) +def _comp_method_FRAME(cls, op, special): + str_rep = _get_opstr(op) + op_name = _get_op_name(op, special) @Appender("Wrapper for comparison method {name}".format(name=op_name)) def f(self, other): @@ -781,18 +781,18 @@ def f(self, other): raise ValueError( "Can only compare identically-labeled DataFrame objects" ) - new_data = dispatch_to_series(self, other, func, str_rep) + new_data = dispatch_to_series(self, other, op, str_rep) return self._construct_result(new_data) elif isinstance(other, ABCSeries): return _combine_series_frame( - self, other, func, fill_value=None, axis=None, level=None + self, other, op, fill_value=None, axis=None, level=None ) else: # straight boolean comparisons we want to allow all columns # (regardless of dtype to pass thru) See #4537 for discussion. - new_data = dispatch_to_series(self, other, func) + new_data = dispatch_to_series(self, other, op) return self._construct_result(new_data) f.__name__ = op_name diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 55b4b1a899f65..a225eec93b27e 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -118,14 +118,14 @@ def masked_arith_op(x, y, op): return result -def define_na_arithmetic_op(op, str_rep, eval_kwargs): +def define_na_arithmetic_op(op, str_rep: str, eval_kwargs): def na_op(x, y): return na_arithmetic_op(x, y, op, str_rep, eval_kwargs) return na_op -def na_arithmetic_op(left, right, op, str_rep, eval_kwargs): +def na_arithmetic_op(left, right, op, str_rep: str, eval_kwargs): """ Return the result of evaluating op on the passed in values. @@ -173,6 +173,7 @@ def arithmetic_op( Cannot be a DataFrame or Index. Series is *not* excluded. op : {operator.add, operator.sub, ...} Or one of the reversed variants from roperator. + str_rep : str Returns ------- @@ -279,8 +280,16 @@ def comparison_op( return res_values -def na_logical_op(x, y, op): +def na_logical_op(x: np.ndarray, y, op): try: + # For exposition, write: + # yarr = isinstance(y, np.ndarray) + # yint = is_integer(y) or (yarr and y.dtype.kind == "i") + # ybool = is_bool(y) or (yarr and y.dtype.kind == "b") + # xint = x.dtype.kind == "i" + # xbool = x.dtype.kind == "b" + # Then Cases where this goes through without raising include: + # (xint or xbool) and (yint or bool) result = op(x, y) except TypeError: if isinstance(y, np.ndarray): @@ -304,9 +313,9 @@ def na_logical_op(x, y, op): NotImplementedError, ): raise TypeError( - "cannot compare a dtyped [{dtype}] array " - "with a scalar of type [{typ}]".format( - dtype=x.dtype, typ=type(y).__name__ + "Cannot perform '{op}' with a dtyped [{dtype}] array " + "and scalar of type [{typ}]".format( + op=op.__name__, dtype=x.dtype, typ=type(y).__name__ ) ) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index b8f9ecd42bae3..8da03a7f61029 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -1,5 +1,4 @@ import numpy as np -from numpy import nan import pytest from pandas._libs import groupby, lib, reduction as libreduction @@ -96,7 +95,7 @@ def _check(dtype): def _ohlc(group): if isna(group).all(): - return np.repeat(nan, 4) + return np.repeat(np.nan, 4) return [group[0], group.max(), group.min(), group[-1]] expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])]) @@ -104,9 +103,9 @@ def _ohlc(group): assert_almost_equal(out, expected) tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64)) - obj[:6] = nan + obj[:6] = np.nan func(out, counts, obj[:, None], labels) - expected[0] = nan + expected[0] = np.nan assert_almost_equal(out, expected) _check("float32") diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index e1e35d8eb7d18..7acddec002d98 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -4,7 +4,6 @@ from io import StringIO import numpy as np -from numpy import nan import pytest import pytz @@ -699,13 +698,13 @@ def test_first_last_max_min_on_time_data(self): df_test = DataFrame( { "dt": [ - nan, + np.nan, "2015-07-24 10:10", "2015-07-25 11:11", "2015-07-23 12:12", - nan, + np.nan, ], - "td": [nan, td(days=1), td(days=2), td(days=3), nan], + "td": [np.nan, td(days=1), td(days=2), td(days=3), np.nan], } ) df_test.dt = pd.to_datetime(df_test.dt) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 8ad09549f3cbe..9feec424389e7 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -4,7 +4,6 @@ import os import numpy as np -from numpy import nan import pytest from pandas.compat import PY36 @@ -323,7 +322,7 @@ def test_excel_writer_context_manager(self, frame, engine, ext): def test_roundtrip(self, engine, ext, frame): frame = frame.copy() - frame["A"][:5] = nan + frame["A"][:5] = np.nan frame.to_excel(self.path, "test1") frame.to_excel(self.path, "test1", columns=["A", "B"]) @@ -388,7 +387,7 @@ def test_ts_frame(self, tsframe, engine, ext): def test_basics_with_nan(self, engine, ext, frame): frame = frame.copy() - frame["A"][:5] = nan + frame["A"][:5] = np.nan frame.to_excel(self.path, "test1") frame.to_excel(self.path, "test1", columns=["A", "B"]) frame.to_excel(self.path, "test1", header=False) @@ -450,7 +449,7 @@ def test_inf_roundtrip(self, engine, ext): def test_sheets(self, engine, ext, frame, tsframe): frame = frame.copy() - frame["A"][:5] = nan + frame["A"][:5] = np.nan frame.to_excel(self.path, "test1") frame.to_excel(self.path, "test1", columns=["A", "B"]) @@ -473,7 +472,7 @@ def test_sheets(self, engine, ext, frame, tsframe): def test_colaliases(self, engine, ext, frame): frame = frame.copy() - frame["A"][:5] = nan + frame["A"][:5] = np.nan frame.to_excel(self.path, "test1") frame.to_excel(self.path, "test1", columns=["A", "B"]) @@ -491,7 +490,7 @@ def test_colaliases(self, engine, ext, frame): def test_roundtrip_indexlabels(self, merge_cells, engine, ext, frame): frame = frame.copy() - frame["A"][:5] = nan + frame["A"][:5] = np.nan frame.to_excel(self.path, "test1") frame.to_excel(self.path, "test1", columns=["A", "B"]) diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 73638fe8ab7c8..9afeaf75f4da3 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -6,7 +6,6 @@ import os import numpy as np -from numpy import nan import pytest import pandas._libs.parsers as parser @@ -309,10 +308,15 @@ def test_empty_field_eof(self): assert_array_dicts_equal(result, expected) # GH5664 - a = DataFrame([["b"], [nan]], columns=["a"], index=["a", "c"]) + a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"]) b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1]) c = DataFrame( - [[1, 2, 3, 4], [6, nan, nan, nan], [8, 9, 10, 11], [13, 14, nan, nan]], + [ + [1, 2, 3, 4], + [6, np.nan, np.nan, np.nan], + [8, 9, 10, 11], + [13, 14, np.nan, np.nan], + ], columns=list("abcd"), index=[0, 5, 7, 12], ) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 4de8bba169438..08698133e360d 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1,10 +1,9 @@ from collections import OrderedDict -from datetime import date, datetime +from datetime import date, datetime, timedelta import random import re import numpy as np -from numpy import nan import pytest from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype @@ -565,9 +564,7 @@ def test_merge_all_na_column(self, series_of_dtype, series_of_dtype_all_na): assert_frame_equal(actual, expected) def test_merge_nosort(self): - # #2098, anything to do? - - from datetime import datetime + # GH#2098, TODO: anything to do? d = { "var1": np.random.randint(0, 10, size=10), @@ -621,9 +618,9 @@ def test_merge_nan_right(self): expected = DataFrame( { "i1": {0: 0, 1: 1}, - "i1_": {0: 0.0, 1: nan}, + "i1_": {0: 0.0, 1: np.nan}, "i2": {0: 0.5, 1: 1.5}, - "i3": {0: 0.69999999999999996, 1: nan}, + "i3": {0: 0.69999999999999996, 1: np.nan}, } )[["i1", "i2", "i1_", "i3"]] assert_frame_equal(result, expected) @@ -640,21 +637,17 @@ def _constructor(self): assert isinstance(result, NotADataFrame) def test_join_append_timedeltas(self): - - import datetime as dt - from pandas import NaT - # timedelta64 issues with join/merge # GH 5695 - d = {"d": dt.datetime(2013, 11, 5, 5, 56), "t": dt.timedelta(0, 22500)} + d = {"d": datetime(2013, 11, 5, 5, 56), "t": timedelta(0, 22500)} df = DataFrame(columns=list("dt")) df = df.append(d, ignore_index=True) result = df.append(d, ignore_index=True) expected = DataFrame( { - "d": [dt.datetime(2013, 11, 5, 5, 56), dt.datetime(2013, 11, 5, 5, 56)], - "t": [dt.timedelta(0, 22500), dt.timedelta(0, 22500)], + "d": [datetime(2013, 11, 5, 5, 56), datetime(2013, 11, 5, 5, 56)], + "t": [timedelta(0, 22500), timedelta(0, 22500)], } ) assert_frame_equal(result, expected) @@ -667,7 +660,7 @@ def test_join_append_timedeltas(self): expected = DataFrame( { "0": Series([td, td], index=list("AB")), - "0r": Series([td, NaT], index=list("AB")), + "0r": Series([td, pd.NaT], index=list("AB")), } ) assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 2b79548be7b59..a9f23313a83b9 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -1,4 +1,4 @@ -from numpy import nan +import numpy as np import pytest import pandas as pd @@ -17,8 +17,8 @@ def test_basic(self): expected = DataFrame( { "key": ["a", "b", "c", "d", "e", "f"], - "lvalue": [1, nan, 2, nan, 3, nan], - "rvalue": [nan, 1, 2, 3, nan, 4], + "lvalue": [1, np.nan, 2, np.nan, 3, np.nan], + "rvalue": [np.nan, 1, 2, 3, np.nan, 4], } ) @@ -30,7 +30,7 @@ def test_ffill(self): { "key": ["a", "b", "c", "d", "e", "f"], "lvalue": [1.0, 1, 2, 2, 3, 3.0], - "rvalue": [nan, 1, 2, 3, 3, 4], + "rvalue": [np.nan, 1, 2, 3, 3, 4], } ) assert_frame_equal(result, expected) @@ -47,7 +47,7 @@ def test_multigroup(self): { "key": ["a", "b", "c", "d", "e", "f"] * 2, "lvalue": [1.0, 1, 2, 2, 3, 3.0] * 2, - "rvalue": [nan, 1, 2, 3, 3, 4] * 2, + "rvalue": [np.nan, 1, 2, 3, 3, 4] * 2, } ) expected["group"] = ["a"] * 6 + ["b"] * 6 @@ -110,7 +110,7 @@ def test_doc_example(self): "group": list("aaaaabbbbb"), "key": ["a", "b", "c", "d", "e"] * 2, "lvalue": [1, 1, 2, 2, 3] * 2, - "rvalue": [nan, 1, 2, 3, 3] * 2, + "rvalue": [np.nan, 1, 2, 3, 3] * 2, } ) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 7aea85153d908..1d8d2add3840c 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -1,7 +1,6 @@ from collections import OrderedDict import numpy as np -from numpy import nan from numpy.random import randn import pytest @@ -311,11 +310,11 @@ def test_left_join_index_multi_match_multiindex(self): [ ["X", "Y", "C", "a", 6], ["X", "Y", "C", "a", 9], - ["W", "Y", "C", "e", nan], + ["W", "Y", "C", "e", np.nan], ["V", "Q", "A", "h", -3], ["V", "R", "D", "i", 2], ["V", "R", "D", "i", -1], - ["X", "Y", "D", "b", nan], + ["X", "Y", "D", "b", np.nan], ["X", "Y", "A", "c", 1], ["X", "Y", "A", "c", 4], ["W", "Q", "B", "f", 3], @@ -365,10 +364,10 @@ def test_left_join_index_multi_match(self): ["c", 0, "x"], ["c", 0, "r"], ["c", 0, "s"], - ["b", 1, nan], + ["b", 1, np.nan], ["a", 2, "v"], ["a", 2, "z"], - ["b", 3, nan], + ["b", 3, np.nan], ], columns=["tag", "val", "char"], index=[2, 2, 2, 2, 0, 1, 1, 3], diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 5b1f151daf219..b1d790644bbfb 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -1,5 +1,4 @@ import numpy as np -from numpy import nan import pytest import pandas as pd @@ -329,11 +328,11 @@ def test_pairs(self): "29dec2008", "20jan2009", ], - "visitdt2": ["21jan2009", nan, "22jan2009", "31dec2008", "03feb2009"], - "visitdt3": ["05feb2009", nan, nan, "02jan2009", "15feb2009"], + "visitdt2": ["21jan2009", np.nan, "22jan2009", "31dec2008", "03feb2009"], + "visitdt3": ["05feb2009", np.nan, np.nan, "02jan2009", "15feb2009"], "wt1": [1823, 3338, 1549, 3298, 4306], - "wt2": [2011.0, nan, 1892.0, 3338.0, 4575.0], - "wt3": [2293.0, nan, nan, 3377.0, 4805.0], + "wt2": [2011.0, np.nan, 1892.0, 3338.0, 4575.0], + "wt3": [2293.0, np.nan, np.nan, 3377.0, 4805.0], } df = DataFrame(data) @@ -497,13 +496,13 @@ def test_pairs(self): "29dec2008", "20jan2009", "21jan2009", - nan, + np.nan, "22jan2009", "31dec2008", "03feb2009", "05feb2009", - nan, - nan, + np.nan, + np.nan, "02jan2009", "15feb2009", ], @@ -514,13 +513,13 @@ def test_pairs(self): 3298.0, 4306.0, 2011.0, - nan, + np.nan, 1892.0, 3338.0, 4575.0, 2293.0, - nan, - nan, + np.nan, + np.nan, 3377.0, 4805.0, ], diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 5e80c317a587b..e2c6f7d1c8feb 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -1,7 +1,6 @@ from collections import OrderedDict import numpy as np -from numpy import nan import pytest from pandas.core.dtypes.common import is_integer_dtype @@ -140,19 +139,19 @@ def test_include_na(self, sparse, dtype): # Sparse dataframes do not allow nan labelled columns, see #GH8822 res_na = get_dummies(s, dummy_na=True, sparse=sparse, dtype=dtype) exp_na = DataFrame( - {nan: [0, 0, 1], "a": [1, 0, 0], "b": [0, 1, 0]}, + {np.nan: [0, 0, 1], "a": [1, 0, 0], "b": [0, 1, 0]}, dtype=self.effective_dtype(dtype), ) - exp_na = exp_na.reindex(["a", "b", nan], axis=1) + exp_na = exp_na.reindex(["a", "b", np.nan], axis=1) # hack (NaN handling in assert_index_equal) exp_na.columns = res_na.columns if sparse: exp_na = exp_na.apply(pd.SparseArray, fill_value=0.0) assert_frame_equal(res_na, exp_na) - res_just_na = get_dummies([nan], dummy_na=True, sparse=sparse, dtype=dtype) + res_just_na = get_dummies([np.nan], dummy_na=True, sparse=sparse, dtype=dtype) exp_just_na = DataFrame( - Series(1, index=[0]), columns=[nan], dtype=self.effective_dtype(dtype) + Series(1, index=[0]), columns=[np.nan], dtype=self.effective_dtype(dtype) ) tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values) @@ -464,14 +463,16 @@ def test_basic_drop_first_NA(self, sparse): assert_frame_equal(res, exp) res_na = get_dummies(s_NA, dummy_na=True, drop_first=True, sparse=sparse) - exp_na = DataFrame({"b": [0, 1, 0], nan: [0, 0, 1]}, dtype=np.uint8).reindex( - ["b", nan], axis=1 + exp_na = DataFrame({"b": [0, 1, 0], np.nan: [0, 0, 1]}, dtype=np.uint8).reindex( + ["b", np.nan], axis=1 ) if sparse: exp_na = exp_na.apply(pd.SparseArray, fill_value=0) assert_frame_equal(res_na, exp_na) - res_just_na = get_dummies([nan], dummy_na=True, drop_first=True, sparse=sparse) + res_just_na = get_dummies( + [np.nan], dummy_na=True, drop_first=True, sparse=sparse + ) exp_just_na = DataFrame(index=np.arange(1)) assert_frame_equal(res_just_na, exp_just_na) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index c93a000f5e7ce..b25fee0435da0 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -1,7 +1,6 @@ from datetime import datetime import numpy as np -from numpy import nan import pytest import pandas as pd @@ -195,9 +194,9 @@ def test_reindex(test_data): def test_reindex_nan(): - ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8]) + ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8]) - i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2] + i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2] assert_series_equal(ts.reindex(i), ts.iloc[j]) ts.index = ts.index.astype("object") diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 08aa3ad02e0ed..d60cd3029e5a8 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -2,7 +2,6 @@ import operator import numpy as np -from numpy import nan import pytest import pandas.util._test_decorators as td @@ -236,7 +235,7 @@ def test_np_diff(self): s = Series(np.arange(5)) r = np.diff(s) - assert_series_equal(Series([nan, 0, 0, 0, nan]), r) + assert_series_equal(Series([np.nan, 0, 0, 0, np.nan]), r) def test_int_diff(self): # int dtype @@ -283,7 +282,7 @@ def test_tz_diff(self): @pytest.mark.parametrize( "input,output,diff", - [([False, True, True, False, False], [nan, True, False, True, False], 1)], + [([False, True, True, False, False], [np.nan, True, False, True, False], 1)], ) def test_bool_diff(self, input, output, diff): # boolean series (test for fixing #17294) @@ -294,7 +293,7 @@ def test_bool_diff(self, input, output, diff): def test_obj_diff(self): # object series - s = Series([False, True, 5.0, nan, True, False]) + s = Series([False, True, 5.0, np.nan, True, False]) result = s.diff() expected = s - s.shift(1) assert_series_equal(result, expected) @@ -538,14 +537,14 @@ def test_count(self, datetime_series): assert datetime_series.count() == np.isfinite(datetime_series).sum() - mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, nan, 1, 2]]) + mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) left = ts.count(level=1) - right = Series([2, 3, 1], index=[1, 2, nan]) + right = Series([2, 3, 1], index=[1, 2, np.nan]) assert_series_equal(left, right) - ts.iloc[[0, 3, 5]] = nan + ts.iloc[[0, 3, 5]] = np.nan assert_series_equal(ts.count(level=1), right - 1) def test_dot(self): @@ -770,11 +769,11 @@ def test_cummethods_bool(self): result = getattr(s, method)() assert_series_equal(result, expected) - e = pd.Series([False, True, nan, False]) - cse = pd.Series([0, 1, nan, 1], dtype=object) - cpe = pd.Series([False, 0, nan, 0]) - cmin = pd.Series([False, False, nan, False]) - cmax = pd.Series([False, True, nan, True]) + e = pd.Series([False, True, np.nan, False]) + cse = pd.Series([0, 1, np.nan, 1], dtype=object) + cpe = pd.Series([False, 0, np.nan, 0]) + cmin = pd.Series([False, False, np.nan, False]) + cmax = pd.Series([False, True, np.nan, True]) expecteds = {"cumsum": cse, "cumprod": cpe, "cummin": cmin, "cummax": cmax} for method in methods: @@ -1042,7 +1041,6 @@ def test_shift_categorical(self): assert_index_equal(s.values.categories, sn2.values.categories) def test_unstack(self): - from numpy import nan index = MultiIndex( levels=[["bar", "foo"], ["one", "three", "two"]], @@ -1053,7 +1051,7 @@ def test_unstack(self): unstacked = s.unstack() expected = DataFrame( - [[2.0, nan, 3.0], [0.0, 1.0, nan]], + [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], index=["bar", "foo"], columns=["one", "three", "two"], ) @@ -1080,7 +1078,9 @@ def test_unstack(self): idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) ts = pd.Series([1, 2], index=idx) left = ts.unstack() - right = DataFrame([[nan, 1], [2, nan]], index=[101, 102], columns=[nan, 3.5]) + right = DataFrame( + [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] + ) assert_frame_equal(left, right) idx = pd.MultiIndex.from_arrays( @@ -1092,9 +1092,10 @@ def test_unstack(self): ) ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) right = DataFrame( - [[1.0, 1.3], [1.1, nan], [nan, 1.4], [1.2, nan]], columns=["cat", "dog"] + [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], + columns=["cat", "dog"], ) - tpls = [("a", 1), ("a", 2), ("b", nan), ("b", 1)] + tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] right.index = pd.MultiIndex.from_tuples(tpls) assert_frame_equal(ts.unstack(level=0), right) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 819b9228219aa..78d666720c091 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -1,7 +1,6 @@ from datetime import datetime import numpy as np -from numpy import nan import pytest import pandas as pd @@ -114,8 +113,8 @@ def test_combine_first(self): assert_series_equal(s, result) def test_update(self): - s = Series([1.5, nan, 3.0, 4.0, nan]) - s2 = Series([nan, 3.5, nan, 5.0]) + s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) + s2 = Series([np.nan, 3.5, np.nan, 5.0]) s.update(s2) expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2f09d777e719c..65cbf5fcf91d2 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2,7 +2,6 @@ from datetime import datetime, timedelta import numpy as np -from numpy import nan import numpy.ma as ma import pytest @@ -456,14 +455,14 @@ def test_unordered_compare_equal(self): def test_constructor_maskedarray(self): data = ma.masked_all((3,), dtype=float) result = Series(data) - expected = Series([nan, nan, nan]) + expected = Series([np.nan, np.nan, np.nan]) assert_series_equal(result, expected) data[0] = 0.0 data[2] = 2.0 index = ["a", "b", "c"] result = Series(data, index=index) - expected = Series([0.0, nan, 2.0], index=index) + expected = Series([0.0, np.nan, 2.0], index=index) assert_series_equal(result, expected) data[1] = 1.0 @@ -473,14 +472,14 @@ def test_constructor_maskedarray(self): data = ma.masked_all((3,), dtype=int) result = Series(data) - expected = Series([nan, nan, nan], dtype=float) + expected = Series([np.nan, np.nan, np.nan], dtype=float) assert_series_equal(result, expected) data[0] = 0 data[2] = 2 index = ["a", "b", "c"] result = Series(data, index=index) - expected = Series([0, nan, 2], index=index, dtype=float) + expected = Series([0, np.nan, 2], index=index, dtype=float) assert_series_equal(result, expected) data[1] = 1 @@ -490,14 +489,14 @@ def test_constructor_maskedarray(self): data = ma.masked_all((3,), dtype=bool) result = Series(data) - expected = Series([nan, nan, nan], dtype=object) + expected = Series([np.nan, np.nan, np.nan], dtype=object) assert_series_equal(result, expected) data[0] = True data[2] = False index = ["a", "b", "c"] result = Series(data, index=index) - expected = Series([True, nan, False], index=index, dtype=object) + expected = Series([True, np.nan, False], index=index, dtype=object) assert_series_equal(result, expected) data[1] = True @@ -534,7 +533,7 @@ def test_constructor_maskedarray_hardened(self): # Check numpy masked arrays with hard masks -- from GH24574 data = ma.masked_all((3,), dtype=float).harden_mask() result = pd.Series(data) - expected = pd.Series([nan, nan, nan]) + expected = pd.Series([np.nan, np.nan, np.nan]) tm.assert_series_equal(result, expected) def test_series_ctor_plus_datetimeindex(self): @@ -736,14 +735,14 @@ def test_constructor_dtype_datetime64(self): s = Series(iNaT, index=range(5)) assert not isna(s).all() - s = Series(nan, dtype="M8[ns]", index=range(5)) + s = Series(np.nan, dtype="M8[ns]", index=range(5)) assert isna(s).all() s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype="M8[ns]") assert isna(s[1]) assert s.dtype == "M8[ns]" - s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype="M8[ns]") + s = Series([datetime(2001, 1, 2, 0, 0), np.nan], dtype="M8[ns]") assert isna(s[1]) assert s.dtype == "M8[ns]" @@ -1026,7 +1025,7 @@ def test_constructor_periodindex(self): def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} result = Series(d, index=["b", "c", "d", "a"]) - expected = Series([1, 2, nan, 0], index=["b", "c", "d", "a"]) + expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) assert_series_equal(result, expected) pidx = tm.makePeriodIndex(100) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index f459ae9e7845d..835514ea724ab 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1,7 +1,6 @@ from datetime import datetime, timedelta import numpy as np -from numpy import nan import pytest import pytz @@ -760,17 +759,17 @@ def test_fillna(self, datetime_series): assert_series_equal(result, expected) def test_fillna_bug(self): - x = Series([nan, 1.0, nan, 3.0, nan], ["z", "a", "b", "c", "d"]) + x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) filled = x.fillna(method="ffill") - expected = Series([nan, 1.0, 1.0, 3.0, 3.0], x.index) + expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], x.index) assert_series_equal(filled, expected) filled = x.fillna(method="bfill") - expected = Series([1.0, 1.0, 3.0, 3.0, nan], x.index) + expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], x.index) assert_series_equal(filled, expected) def test_fillna_inplace(self): - x = Series([nan, 1.0, nan, 3.0, nan], ["z", "a", "b", "c", "d"]) + x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) y = x.copy() y.fillna(value=0, inplace=True) @@ -916,20 +915,20 @@ def test_valid(self, datetime_series): tm.assert_series_equal(result, ts[pd.notna(ts)]) def test_isna(self): - ser = Series([0, 5.4, 3, nan, -0.001]) + ser = Series([0, 5.4, 3, np.nan, -0.001]) expected = Series([False, False, False, True, False]) tm.assert_series_equal(ser.isna(), expected) - ser = Series(["hi", "", nan]) + ser = Series(["hi", "", np.nan]) expected = Series([False, False, True]) tm.assert_series_equal(ser.isna(), expected) def test_notna(self): - ser = Series([0, 5.4, 3, nan, -0.001]) + ser = Series([0, 5.4, 3, np.nan, -0.001]) expected = Series([True, True, True, False, True]) tm.assert_series_equal(ser.notna(), expected) - ser = Series(["hi", "", nan]) + ser = Series(["hi", "", np.nan]) expected = Series([True, True, False]) tm.assert_series_equal(ser.notna(), expected) @@ -1357,35 +1356,39 @@ def test_interp_limit_bad_direction(self): # limit_area introduced GH #16284 def test_interp_limit_area(self): # These tests are for issue #9218 -- fill NaNs in both directions. - s = Series([nan, nan, 3, nan, nan, nan, 7, nan, nan]) + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) - expected = Series([nan, nan, 3.0, 4.0, 5.0, 6.0, 7.0, nan, nan]) + expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="linear", limit_area="inside") assert_series_equal(result, expected) - expected = Series([nan, nan, 3.0, 4.0, nan, nan, 7.0, nan, nan]) + expected = Series( + [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan] + ) result = s.interpolate(method="linear", limit_area="inside", limit=1) - expected = Series([nan, nan, 3.0, 4.0, nan, 6.0, 7.0, nan, nan]) + expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) result = s.interpolate( method="linear", limit_area="inside", limit_direction="both", limit=1 ) assert_series_equal(result, expected) - expected = Series([nan, nan, 3.0, nan, nan, nan, 7.0, 7.0, 7.0]) + expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) result = s.interpolate(method="linear", limit_area="outside") assert_series_equal(result, expected) - expected = Series([nan, nan, 3.0, nan, nan, nan, 7.0, 7.0, nan]) + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) result = s.interpolate(method="linear", limit_area="outside", limit=1) - expected = Series([nan, 3.0, 3.0, nan, nan, nan, 7.0, 7.0, nan]) + expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) result = s.interpolate( method="linear", limit_area="outside", limit_direction="both", limit=1 ) assert_series_equal(result, expected) - expected = Series([3.0, 3.0, 3.0, nan, nan, nan, 7.0, nan, nan]) + expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) result = s.interpolate( method="linear", limit_area="outside", direction="backward" ) diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index f93e1651c8b10..5dd27e4c20dcf 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -1,7 +1,6 @@ from itertools import chain, product import numpy as np -from numpy import nan import pytest from pandas._libs.algos import Infinity, NegInfinity @@ -16,14 +15,14 @@ class TestSeriesRank(TestData): - s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) + s = Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]) results = { - "average": np.array([1.5, 5.5, 7.0, 3.5, nan, 3.5, 1.5, 8.0, nan, 5.5]), - "min": np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), - "max": np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), - "first": np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), - "dense": np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), + "average": np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5]), + "min": np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5]), + "max": np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6]), + "first": np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6]), + "dense": np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]), } def test_rank(self): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d81ee79418e9c..a5706d8baa614 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -3,7 +3,6 @@ import struct import numpy as np -from numpy import nan from numpy.random import RandomState import pytest @@ -1623,11 +1622,11 @@ def _check(arr): result = libalgos.rank_1d_float64(arr) arr[mask] = np.inf exp = rankdata(arr) - exp[mask] = nan + exp[mask] = np.nan assert_almost_equal(result, exp) - _check(np.array([nan, nan, 5.0, 5.0, 5.0, nan, 1, 2, 3, nan])) - _check(np.array([4.0, nan, 5.0, 5.0, 5.0, nan, 1, 2, 4.0, nan])) + _check(np.array([np.nan, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 3, np.nan])) + _check(np.array([4.0, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 4.0, np.nan])) def test_basic(self): exp = np.array([1, 2], dtype=np.float64) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 4a60d3966a9bb..b9a33d130a99c 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1475,17 +1475,14 @@ def test_frame_dict_constructor_empty_series(self): def test_multiindex_na_repr(self): # only an issue with long columns - - from numpy import nan - df3 = DataFrame( { "A" * 30: {("A", "A0006000", "nuit"): "A0006000"}, - "B" * 30: {("A", "A0006000", "nuit"): nan}, - "C" * 30: {("A", "A0006000", "nuit"): nan}, - "D" * 30: {("A", "A0006000", "nuit"): nan}, + "B" * 30: {("A", "A0006000", "nuit"): np.nan}, + "C" * 30: {("A", "A0006000", "nuit"): np.nan}, + "D" * 30: {("A", "A0006000", "nuit"): np.nan}, "E" * 30: {("A", "A0006000", "nuit"): "A"}, - "F" * 30: {("A", "A0006000", "nuit"): nan}, + "F" * 30: {("A", "A0006000", "nuit"): np.nan}, } ) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index f64ad8edafbd7..9be35198a5592 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -3,7 +3,6 @@ from itertools import product import numpy as np -from numpy import nan import pytest from pandas import DataFrame, MultiIndex, Series, array, concat, merge @@ -103,7 +102,7 @@ def aggr(func): assert_frame_equal(gr.median(), aggr(np.median)) def test_lexsort_indexer(self): - keys = [[nan] * 5 + list(range(100)) + [nan] * 5] + keys = [[np.nan] * 5 + list(range(100)) + [np.nan] * 5] # orders=True, na_position='last' result = lexsort_indexer(keys, orders=True, na_position="last") exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) @@ -126,7 +125,7 @@ def test_lexsort_indexer(self): def test_nargsort(self): # np.argsort(items) places NaNs last - items = [nan] * 5 + list(range(100)) + [nan] * 5 + items = [np.nan] * 5 + list(range(100)) + [np.nan] * 5 # np.argsort(items2) may not place NaNs first items2 = np.array(items, dtype="O") diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index b50f1a0fd2f2a..53d74f74dc439 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2,7 +2,6 @@ import re import numpy as np -from numpy import nan as NA from numpy.random import randint import pytest @@ -719,40 +718,42 @@ def test_cat_on_filtered_index(self): assert str_multiple.loc[1] == "2011 2 2" def test_count(self): - values = np.array(["foo", "foofoo", NA, "foooofooofommmfoo"], dtype=np.object_) + values = np.array( + ["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=np.object_ + ) result = strings.str_count(values, "f[o]+") - exp = np.array([1, 2, NA, 4]) + exp = np.array([1, 2, np.nan, 4]) tm.assert_numpy_array_equal(result, exp) result = Series(values).str.count("f[o]+") - exp = Series([1, 2, NA, 4]) + exp = Series([1, 2, np.nan, 4]) assert isinstance(result, Series) tm.assert_series_equal(result, exp) # mixed - mixed = ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0] + mixed = ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0] rs = strings.str_count(mixed, "a") - xp = np.array([1, NA, 0, NA, NA, 0, NA, NA, NA]) + xp = np.array([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan]) tm.assert_numpy_array_equal(rs, xp) rs = Series(mixed).str.count("a") - xp = Series([1, NA, 0, NA, NA, 0, NA, NA, NA]) + xp = Series([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) def test_contains(self): values = np.array( - ["foo", NA, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_ + ["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_ ) pat = "mmm[_]+" result = strings.str_contains(values, pat) - expected = np.array([False, NA, True, True, False], dtype=np.object_) + expected = np.array([False, np.nan, True, True, False], dtype=np.object_) tm.assert_numpy_array_equal(result, expected) result = strings.str_contains(values, pat, regex=False) - expected = np.array([False, NA, False, False, True], dtype=np.object_) + expected = np.array([False, np.nan, False, False, True], dtype=np.object_) tm.assert_numpy_array_equal(result, expected) values = ["foo", "xyz", "fooommm__foo", "mmm_"] @@ -773,18 +774,23 @@ def test_contains(self): tm.assert_numpy_array_equal(result, expected) # mixed - mixed = ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0] + mixed = ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0] rs = strings.str_contains(mixed, "o") - xp = np.array([False, NA, False, NA, NA, True, NA, NA, NA], dtype=np.object_) + xp = np.array( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan], + dtype=np.object_, + ) tm.assert_numpy_array_equal(rs, xp) rs = Series(mixed).str.contains("o") - xp = Series([False, NA, False, NA, NA, True, NA, NA, NA]) + xp = Series( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan] + ) assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode - values = np.array(["foo", NA, "fooommm__foo", "mmm_"], dtype=np.object_) + values = np.array(["foo", np.nan, "fooommm__foo", "mmm_"], dtype=np.object_) pat = "mmm[_]+" result = strings.str_contains(values, pat) @@ -825,10 +831,10 @@ def test_contains_for_object_category(self): tm.assert_series_equal(result, expected) def test_startswith(self): - values = Series(["om", NA, "foo_nom", "nom", "bar_foo", NA, "foo"]) + values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) result = values.str.startswith("foo") - exp = Series([False, NA, True, False, False, NA, True]) + exp = Series([False, np.nan, True, False, False, np.nan, True]) tm.assert_series_equal(result, exp) result = values.str.startswith("foo", na=True) @@ -836,92 +842,114 @@ def test_startswith(self): # mixed mixed = np.array( - ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0], + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], dtype=np.object_, ) rs = strings.str_startswith(mixed, "f") - xp = np.array([False, NA, False, NA, NA, True, NA, NA, NA], dtype=np.object_) + xp = np.array( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan], + dtype=np.object_, + ) tm.assert_numpy_array_equal(rs, xp) rs = Series(mixed).str.startswith("f") assert isinstance(rs, Series) - xp = Series([False, NA, False, NA, NA, True, NA, NA, NA]) + xp = Series( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan] + ) tm.assert_series_equal(rs, xp) def test_endswith(self): - values = Series(["om", NA, "foo_nom", "nom", "bar_foo", NA, "foo"]) + values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) result = values.str.endswith("foo") - exp = Series([False, NA, False, False, True, NA, True]) + exp = Series([False, np.nan, False, False, True, np.nan, True]) tm.assert_series_equal(result, exp) result = values.str.endswith("foo", na=False) tm.assert_series_equal(result, exp.fillna(False).astype(bool)) # mixed - mixed = ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0] + mixed = ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0] rs = strings.str_endswith(mixed, "f") - xp = np.array([False, NA, False, NA, NA, False, NA, NA, NA], dtype=np.object_) + xp = np.array( + [False, np.nan, False, np.nan, np.nan, False, np.nan, np.nan, np.nan], + dtype=np.object_, + ) tm.assert_numpy_array_equal(rs, xp) rs = Series(mixed).str.endswith("f") - xp = Series([False, NA, False, NA, NA, False, NA, NA, NA]) + xp = Series( + [False, np.nan, False, np.nan, np.nan, False, np.nan, np.nan, np.nan] + ) assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) def test_title(self): - values = Series(["FOO", "BAR", NA, "Blah", "blurg"]) + values = Series(["FOO", "BAR", np.nan, "Blah", "blurg"]) result = values.str.title() - exp = Series(["Foo", "Bar", NA, "Blah", "Blurg"]) + exp = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"]) tm.assert_series_equal(result, exp) # mixed - mixed = Series(["FOO", NA, "bar", True, datetime.today(), "blah", None, 1, 2.0]) + mixed = Series( + ["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0] + ) mixed = mixed.str.title() - exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA]) + exp = Series( + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan] + ) tm.assert_almost_equal(mixed, exp) def test_lower_upper(self): - values = Series(["om", NA, "nom", "nom"]) + values = Series(["om", np.nan, "nom", "nom"]) result = values.str.upper() - exp = Series(["OM", NA, "NOM", "NOM"]) + exp = Series(["OM", np.nan, "NOM", "NOM"]) tm.assert_series_equal(result, exp) result = result.str.lower() tm.assert_series_equal(result, values) # mixed - mixed = Series(["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0]) + mixed = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) mixed = mixed.str.upper() rs = Series(mixed).str.lower() - xp = Series(["a", NA, "b", NA, NA, "foo", NA, NA, NA]) + xp = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) def test_capitalize(self): - values = Series(["FOO", "BAR", NA, "Blah", "blurg"]) + values = Series(["FOO", "BAR", np.nan, "Blah", "blurg"]) result = values.str.capitalize() - exp = Series(["Foo", "Bar", NA, "Blah", "Blurg"]) + exp = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"]) tm.assert_series_equal(result, exp) # mixed - mixed = Series(["FOO", NA, "bar", True, datetime.today(), "blah", None, 1, 2.0]) + mixed = Series( + ["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0] + ) mixed = mixed.str.capitalize() - exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA]) + exp = Series( + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan] + ) tm.assert_almost_equal(mixed, exp) def test_swapcase(self): - values = Series(["FOO", "BAR", NA, "Blah", "blurg"]) + values = Series(["FOO", "BAR", np.nan, "Blah", "blurg"]) result = values.str.swapcase() - exp = Series(["foo", "bar", NA, "bLAH", "BLURG"]) + exp = Series(["foo", "bar", np.nan, "bLAH", "BLURG"]) tm.assert_series_equal(result, exp) # mixed - mixed = Series(["FOO", NA, "bar", True, datetime.today(), "Blah", None, 1, 2.0]) + mixed = Series( + ["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0] + ) mixed = mixed.str.swapcase() - exp = Series(["foo", NA, "BAR", NA, NA, "bLAH", NA, NA, NA]) + exp = Series( + ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", np.nan, np.nan, np.nan] + ) tm.assert_almost_equal(mixed, exp) def test_casemethods(self): @@ -934,23 +962,23 @@ def test_casemethods(self): assert s.str.swapcase().tolist() == [v.swapcase() for v in values] def test_replace(self): - values = Series(["fooBAD__barBAD", NA]) + values = Series(["fooBAD__barBAD", np.nan]) result = values.str.replace("BAD[_]*", "") - exp = Series(["foobar", NA]) + exp = Series(["foobar", np.nan]) tm.assert_series_equal(result, exp) result = values.str.replace("BAD[_]*", "", n=1) - exp = Series(["foobarBAD", NA]) + exp = Series(["foobarBAD", np.nan]) tm.assert_series_equal(result, exp) # mixed mixed = Series( - ["aBAD", NA, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] + ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] ) rs = Series(mixed).str.replace("BAD[_]*", "") - xp = Series(["a", NA, "b", NA, NA, "foo", NA, NA, NA]) + xp = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -971,12 +999,12 @@ def test_replace(self): def test_replace_callable(self): # GH 15055 - values = Series(["fooBAD__barBAD", NA]) + values = Series(["fooBAD__barBAD", np.nan]) # test with callable repl = lambda m: m.group(0).swapcase() result = values.str.replace("[a-z][A-Z]{2}", repl, n=2) - exp = Series(["foObaD__baRbaD", NA]) + exp = Series(["foObaD__baRbaD", np.nan]) tm.assert_series_equal(result, exp) # test with wrong number of arguments, raising an error @@ -998,34 +1026,34 @@ def test_replace_callable(self): values.str.replace("a", repl) # test regex named groups - values = Series(["Foo Bar Baz", NA]) + values = Series(["Foo Bar Baz", np.nan]) pat = r"(?P\w+) (?P\w+) (?P\w+)" repl = lambda m: m.group("middle").swapcase() result = values.str.replace(pat, repl) - exp = Series(["bAR", NA]) + exp = Series(["bAR", np.nan]) tm.assert_series_equal(result, exp) def test_replace_compiled_regex(self): # GH 15446 - values = Series(["fooBAD__barBAD", NA]) + values = Series(["fooBAD__barBAD", np.nan]) # test with compiled regex pat = re.compile(r"BAD[_]*") result = values.str.replace(pat, "") - exp = Series(["foobar", NA]) + exp = Series(["foobar", np.nan]) tm.assert_series_equal(result, exp) result = values.str.replace(pat, "", n=1) - exp = Series(["foobarBAD", NA]) + exp = Series(["foobarBAD", np.nan]) tm.assert_series_equal(result, exp) # mixed mixed = Series( - ["aBAD", NA, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] + ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] ) rs = Series(mixed).str.replace(pat, "") - xp = Series(["a", NA, "b", NA, NA, "foo", NA, NA, NA]) + xp = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -1038,7 +1066,7 @@ def test_replace_compiled_regex(self): # case and flags provided to str.replace will have no effect # and will produce warnings - values = Series(["fooBAD__barBAD__bad", NA]) + values = Series(["fooBAD__barBAD__bad", np.nan]) pat = re.compile(r"BAD[_]*") with pytest.raises(ValueError, match="case and flags cannot be"): @@ -1051,21 +1079,21 @@ def test_replace_compiled_regex(self): result = values.str.replace(pat, "", case=True) # test with callable - values = Series(["fooBAD__barBAD", NA]) + values = Series(["fooBAD__barBAD", np.nan]) repl = lambda m: m.group(0).swapcase() pat = re.compile("[a-z][A-Z]{2}") result = values.str.replace(pat, repl, n=2) - exp = Series(["foObaD__baRbaD", NA]) + exp = Series(["foObaD__baRbaD", np.nan]) tm.assert_series_equal(result, exp) def test_replace_literal(self): # GH16808 literal replace (regex=False vs regex=True) - values = Series(["f.o", "foo", NA]) - exp = Series(["bao", "bao", NA]) + values = Series(["f.o", "foo", np.nan]) + exp = Series(["bao", "bao", np.nan]) result = values.str.replace("f.", "ba") tm.assert_series_equal(result, exp) - exp = Series(["bao", "foo", NA]) + exp = Series(["bao", "foo", np.nan]) result = values.str.replace("f.", "ba", regex=False) tm.assert_series_equal(result, exp) @@ -1083,42 +1111,54 @@ def test_replace_literal(self): values.str.replace(compiled_pat, "", regex=False) def test_repeat(self): - values = Series(["a", "b", NA, "c", NA, "d"]) + values = Series(["a", "b", np.nan, "c", np.nan, "d"]) result = values.str.repeat(3) - exp = Series(["aaa", "bbb", NA, "ccc", NA, "ddd"]) + exp = Series(["aaa", "bbb", np.nan, "ccc", np.nan, "ddd"]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) - exp = Series(["a", "bb", NA, "cccc", NA, "dddddd"]) + exp = Series(["a", "bb", np.nan, "cccc", np.nan, "dddddd"]) tm.assert_series_equal(result, exp) # mixed - mixed = Series(["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0]) + mixed = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) rs = Series(mixed).str.repeat(3) - xp = Series(["aaa", NA, "bbb", NA, NA, "foofoofoo", NA, NA, NA]) + xp = Series( + ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", np.nan, np.nan, np.nan] + ) assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) def test_match(self): # New match behavior introduced in 0.13 - values = Series(["fooBAD__barBAD", NA, "foo"]) + values = Series(["fooBAD__barBAD", np.nan, "foo"]) result = values.str.match(".*(BAD[_]+).*(BAD)") - exp = Series([True, NA, False]) + exp = Series([True, np.nan, False]) tm.assert_series_equal(result, exp) - values = Series(["fooBAD__barBAD", NA, "foo"]) + values = Series(["fooBAD__barBAD", np.nan, "foo"]) result = values.str.match(".*BAD[_]+.*BAD") - exp = Series([True, NA, False]) + exp = Series([True, np.nan, False]) tm.assert_series_equal(result, exp) # mixed mixed = Series( - ["aBAD_BAD", NA, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0] + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] ) rs = Series(mixed).str.match(".*(BAD[_]+).*(BAD)") - xp = Series([True, NA, True, NA, NA, False, NA, NA, NA]) + xp = Series([True, np.nan, True, np.nan, np.nan, False, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) @@ -1131,12 +1171,12 @@ def test_match(self): assert_series_equal(exp, res) def test_extract_expand_None(self): - values = Series(["fooBAD__barBAD", NA, "foo"]) + values = Series(["fooBAD__barBAD", np.nan, "foo"]) with pytest.raises(ValueError, match="expand must be True or False"): values.str.extract(".*(BAD[_]+).*(BAD)", expand=None) def test_extract_expand_unspecified(self): - values = Series(["fooBAD__barBAD", NA, "foo"]) + values = Series(["fooBAD__barBAD", np.nan, "foo"]) result_unspecified = values.str.extract(".*(BAD[_]+).*") assert isinstance(result_unspecified, DataFrame) result_true = values.str.extract(".*(BAD[_]+).*", expand=True) @@ -1144,8 +1184,8 @@ def test_extract_expand_unspecified(self): def test_extract_expand_False(self): # Contains tests like those in test_match and some others. - values = Series(["fooBAD__barBAD", NA, "foo"]) - er = [NA, NA] # empty row + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + er = [np.nan, np.nan] # empty row result = values.str.extract(".*(BAD[_]+).*(BAD)", expand=False) exp = DataFrame([["BAD__", "BAD"], er, er]) @@ -1153,7 +1193,17 @@ def test_extract_expand_False(self): # mixed mixed = Series( - ["aBAD_BAD", NA, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0] + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] ) rs = Series(mixed).str.extract(".*(BAD[_]+).*(BAD)", expand=False) @@ -1161,7 +1211,7 @@ def test_extract_expand_False(self): tm.assert_frame_equal(rs, exp) # unicode - values = Series(["fooBAD__barBAD", NA, "foo"]) + values = Series(["fooBAD__barBAD", np.nan, "foo"]) result = values.str.extract(".*(BAD[_]+).*(BAD)", expand=False) exp = DataFrame([["BAD__", "BAD"], er, er]) @@ -1200,51 +1250,55 @@ def test_extract_expand_False(self): s = Series(["A1", "B2", "C3"]) # one group, no matches result = s.str.extract("(_)", expand=False) - exp = Series([NA, NA, NA], dtype=object) + exp = Series([np.nan, np.nan, np.nan], dtype=object) tm.assert_series_equal(result, exp) # two groups, no matches result = s.str.extract("(_)(_)", expand=False) - exp = DataFrame([[NA, NA], [NA, NA], [NA, NA]], dtype=object) + exp = DataFrame( + [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=object + ) tm.assert_frame_equal(result, exp) # one group, some matches result = s.str.extract("([AB])[123]", expand=False) - exp = Series(["A", "B", NA]) + exp = Series(["A", "B", np.nan]) tm.assert_series_equal(result, exp) # two groups, some matches result = s.str.extract("([AB])([123])", expand=False) - exp = DataFrame([["A", "1"], ["B", "2"], [NA, NA]]) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) tm.assert_frame_equal(result, exp) # one named group result = s.str.extract("(?P[AB])", expand=False) - exp = Series(["A", "B", NA], name="letter") + exp = Series(["A", "B", np.nan], name="letter") tm.assert_series_equal(result, exp) # two named groups result = s.str.extract("(?P[AB])(?P[123])", expand=False) exp = DataFrame( - [["A", "1"], ["B", "2"], [NA, NA]], columns=["letter", "number"] + [["A", "1"], ["B", "2"], [np.nan, np.nan]], columns=["letter", "number"] ) tm.assert_frame_equal(result, exp) # mix named and unnamed groups result = s.str.extract("([AB])(?P[123])", expand=False) - exp = DataFrame([["A", "1"], ["B", "2"], [NA, NA]], columns=[0, "number"]) + exp = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], columns=[0, "number"] + ) tm.assert_frame_equal(result, exp) # one normal group, one non-capturing group result = s.str.extract("([AB])(?:[123])", expand=False) - exp = Series(["A", "B", NA]) + exp = Series(["A", "B", np.nan]) tm.assert_series_equal(result, exp) # two normal groups, one non-capturing group result = Series(["A11", "B22", "C33"]).str.extract( "([AB])([123])(?:[123])", expand=False ) - exp = DataFrame([["A", "1"], ["B", "2"], [NA, NA]]) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) tm.assert_frame_equal(result, exp) # one optional group followed by one normal group @@ -1252,7 +1306,7 @@ def test_extract_expand_False(self): "(?P[AB])?(?P[123])", expand=False ) exp = DataFrame( - [["A", "1"], ["B", "2"], [NA, "3"]], columns=["letter", "number"] + [["A", "1"], ["B", "2"], [np.nan, "3"]], columns=["letter", "number"] ) tm.assert_frame_equal(result, exp) @@ -1261,7 +1315,7 @@ def test_extract_expand_False(self): "(?P[ABC])(?P[123])?", expand=False ) exp = DataFrame( - [["A", "1"], ["B", "2"], ["C", NA]], columns=["letter", "number"] + [["A", "1"], ["B", "2"], ["C", np.nan]], columns=["letter", "number"] ) tm.assert_frame_equal(result, exp) @@ -1272,13 +1326,13 @@ def check_index(index): index = index[: len(data)] s = Series(data, index=index) result = s.str.extract(r"(\d)", expand=False) - exp = Series(["1", "2", NA], index=index) + exp = Series(["1", "2", np.nan], index=index) tm.assert_series_equal(result, exp) result = Series(data, index=index).str.extract( r"(?P\D)(?P\d)?", expand=False ) - e_list = [["A", "1"], ["B", "2"], ["C", NA]] + e_list = [["A", "1"], ["B", "2"], ["C", np.nan]] exp = DataFrame(e_list, columns=["letter", "number"], index=index) tm.assert_frame_equal(result, exp) @@ -1302,8 +1356,8 @@ def check_index(index): def test_extract_expand_True(self): # Contains tests like those in test_match and some others. - values = Series(["fooBAD__barBAD", NA, "foo"]) - er = [NA, NA] # empty row + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + er = [np.nan, np.nan] # empty row result = values.str.extract(".*(BAD[_]+).*(BAD)", expand=True) exp = DataFrame([["BAD__", "BAD"], er, er]) @@ -1311,7 +1365,17 @@ def test_extract_expand_True(self): # mixed mixed = Series( - ["aBAD_BAD", NA, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0] + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] ) rs = Series(mixed).str.extract(".*(BAD[_]+).*(BAD)", expand=True) @@ -1344,32 +1408,34 @@ def test_extract_series(self): s = Series(["A1", "B2", "C3"], name=series_name) # one group, no matches result = s.str.extract("(_)", expand=True) - exp = DataFrame([NA, NA, NA], dtype=object) + exp = DataFrame([np.nan, np.nan, np.nan], dtype=object) tm.assert_frame_equal(result, exp) # two groups, no matches result = s.str.extract("(_)(_)", expand=True) - exp = DataFrame([[NA, NA], [NA, NA], [NA, NA]], dtype=object) + exp = DataFrame( + [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=object + ) tm.assert_frame_equal(result, exp) # one group, some matches result = s.str.extract("([AB])[123]", expand=True) - exp = DataFrame(["A", "B", NA]) + exp = DataFrame(["A", "B", np.nan]) tm.assert_frame_equal(result, exp) # two groups, some matches result = s.str.extract("([AB])([123])", expand=True) - exp = DataFrame([["A", "1"], ["B", "2"], [NA, NA]]) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) tm.assert_frame_equal(result, exp) # one named group result = s.str.extract("(?P[AB])", expand=True) - exp = DataFrame({"letter": ["A", "B", NA]}) + exp = DataFrame({"letter": ["A", "B", np.nan]}) tm.assert_frame_equal(result, exp) # two named groups result = s.str.extract("(?P[AB])(?P[123])", expand=True) - e_list = [["A", "1"], ["B", "2"], [NA, NA]] + e_list = [["A", "1"], ["B", "2"], [np.nan, np.nan]] exp = DataFrame(e_list, columns=["letter", "number"]) tm.assert_frame_equal(result, exp) @@ -1380,7 +1446,7 @@ def test_extract_series(self): # one normal group, one non-capturing group result = s.str.extract("([AB])(?:[123])", expand=True) - exp = DataFrame(["A", "B", NA]) + exp = DataFrame(["A", "B", np.nan]) tm.assert_frame_equal(result, exp) def test_extract_optional_groups(self): @@ -1389,14 +1455,14 @@ def test_extract_optional_groups(self): result = Series(["A11", "B22", "C33"]).str.extract( "([AB])([123])(?:[123])", expand=True ) - exp = DataFrame([["A", "1"], ["B", "2"], [NA, NA]]) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) tm.assert_frame_equal(result, exp) # one optional group followed by one normal group result = Series(["A1", "B2", "3"]).str.extract( "(?P[AB])?(?P[123])", expand=True ) - e_list = [["A", "1"], ["B", "2"], [NA, "3"]] + e_list = [["A", "1"], ["B", "2"], [np.nan, "3"]] exp = DataFrame(e_list, columns=["letter", "number"]) tm.assert_frame_equal(result, exp) @@ -1404,7 +1470,7 @@ def test_extract_optional_groups(self): result = Series(["A1", "B2", "C"]).str.extract( "(?P[ABC])(?P[123])?", expand=True ) - e_list = [["A", "1"], ["B", "2"], ["C", NA]] + e_list = [["A", "1"], ["B", "2"], ["C", np.nan]] exp = DataFrame(e_list, columns=["letter", "number"]) tm.assert_frame_equal(result, exp) @@ -1414,13 +1480,13 @@ def check_index(index): data = ["A1", "B2", "C"] index = index[: len(data)] result = Series(data, index=index).str.extract(r"(\d)", expand=True) - exp = DataFrame(["1", "2", NA], index=index) + exp = DataFrame(["1", "2", np.nan], index=index) tm.assert_frame_equal(result, exp) result = Series(data, index=index).str.extract( r"(?P\D)(?P\d)?", expand=True ) - e_list = [["A", "1"], ["B", "2"], ["C", NA]] + e_list = [["A", "1"], ["B", "2"], ["C", np.nan]] exp = DataFrame(e_list, columns=["letter", "number"], index=index) tm.assert_frame_equal(result, exp) @@ -1530,7 +1596,7 @@ def test_extractall(self): [(1, 0), (2, 0), (2, 1)], names=(None, "match") ) expected_df = DataFrame( - [("A", "1"), (NA, "3"), (NA, "2")], + [("A", "1"), (np.nan, "3"), (np.nan, "2")], expected_index, columns=["letter", "number"], ) @@ -1540,7 +1606,9 @@ def test_extractall(self): pattern = "([AB])?(?P[123])" computed_df = Series(subject_list).str.extractall(pattern) expected_df = DataFrame( - [("A", "1"), (NA, "3"), (NA, "2")], expected_index, columns=[0, "number"] + [("A", "1"), (np.nan, "3"), (np.nan, "2")], + expected_index, + columns=[0, "number"], ) tm.assert_frame_equal(computed_df, expected_df) @@ -1918,11 +1986,33 @@ def test_join(self): # mixed mixed = Series( - ["a_b", NA, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0] + [ + "a_b", + np.nan, + "asdf_cas_asdf", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] ) rs = Series(mixed).str.split("_").str.join("_") - xp = Series(["a_b", NA, "asdf_cas_asdf", NA, NA, "foo", NA, NA, NA]) + xp = Series( + [ + "a_b", + np.nan, + "asdf_cas_asdf", + np.nan, + np.nan, + "foo", + np.nan, + np.nan, + np.nan, + ] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -1931,34 +2021,66 @@ def test_len(self): values = Series(["foo", "fooo", "fooooo", np.nan, "fooooooo"]) result = values.str.len() - exp = values.map(lambda x: len(x) if notna(x) else NA) + exp = values.map(lambda x: len(x) if notna(x) else np.nan) tm.assert_series_equal(result, exp) # mixed mixed = Series( - ["a_b", NA, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0] + [ + "a_b", + np.nan, + "asdf_cas_asdf", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] ) rs = Series(mixed).str.len() - xp = Series([3, NA, 13, NA, NA, 3, NA, NA, NA]) + xp = Series([3, np.nan, 13, np.nan, np.nan, 3, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) def test_findall(self): - values = Series(["fooBAD__barBAD", NA, "foo", "BAD"]) + values = Series(["fooBAD__barBAD", np.nan, "foo", "BAD"]) result = values.str.findall("BAD[_]*") - exp = Series([["BAD__", "BAD"], NA, [], ["BAD"]]) + exp = Series([["BAD__", "BAD"], np.nan, [], ["BAD"]]) tm.assert_almost_equal(result, exp) # mixed mixed = Series( - ["fooBAD__barBAD", NA, "foo", True, datetime.today(), "BAD", None, 1, 2.0] + [ + "fooBAD__barBAD", + np.nan, + "foo", + True, + datetime.today(), + "BAD", + None, + 1, + 2.0, + ] ) rs = Series(mixed).str.findall("BAD[_]*") - xp = Series([["BAD__", "BAD"], NA, [], NA, NA, ["BAD"], NA, NA, NA]) + xp = Series( + [ + ["BAD__", "BAD"], + np.nan, + [], + np.nan, + np.nan, + ["BAD"], + np.nan, + np.nan, + np.nan, + ] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -2078,59 +2200,65 @@ def _check(result, expected): tm.assert_series_equal(result, Series([3, 1, 2, np.nan])) def test_pad(self): - values = Series(["a", "b", NA, "c", NA, "eeeeee"]) + values = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"]) result = values.str.pad(5, side="left") - exp = Series([" a", " b", NA, " c", NA, "eeeeee"]) + exp = Series([" a", " b", np.nan, " c", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side="right") - exp = Series(["a ", "b ", NA, "c ", NA, "eeeeee"]) + exp = Series(["a ", "b ", np.nan, "c ", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side="both") - exp = Series([" a ", " b ", NA, " c ", NA, "eeeeee"]) + exp = Series([" a ", " b ", np.nan, " c ", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) # mixed - mixed = Series(["a", NA, "b", True, datetime.today(), "ee", None, 1, 2.0]) + mixed = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0]) rs = Series(mixed).str.pad(5, side="left") - xp = Series([" a", NA, " b", NA, NA, " ee", NA, NA, NA]) + xp = Series( + [" a", np.nan, " b", np.nan, np.nan, " ee", np.nan, np.nan, np.nan] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - mixed = Series(["a", NA, "b", True, datetime.today(), "ee", None, 1, 2.0]) + mixed = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0]) rs = Series(mixed).str.pad(5, side="right") - xp = Series(["a ", NA, "b ", NA, NA, "ee ", NA, NA, NA]) + xp = Series( + ["a ", np.nan, "b ", np.nan, np.nan, "ee ", np.nan, np.nan, np.nan] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - mixed = Series(["a", NA, "b", True, datetime.today(), "ee", None, 1, 2.0]) + mixed = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0]) rs = Series(mixed).str.pad(5, side="both") - xp = Series([" a ", NA, " b ", NA, NA, " ee ", NA, NA, NA]) + xp = Series( + [" a ", np.nan, " b ", np.nan, np.nan, " ee ", np.nan, np.nan, np.nan] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) def test_pad_fillchar(self): - values = Series(["a", "b", NA, "c", NA, "eeeeee"]) + values = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"]) result = values.str.pad(5, side="left", fillchar="X") - exp = Series(["XXXXa", "XXXXb", NA, "XXXXc", NA, "eeeeee"]) + exp = Series(["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side="right", fillchar="X") - exp = Series(["aXXXX", "bXXXX", NA, "cXXXX", NA, "eeeeee"]) + exp = Series(["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side="both", fillchar="X") - exp = Series(["XXaXX", "XXbXX", NA, "XXcXX", NA, "eeeeee"]) + exp = Series(["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) msg = "fillchar must be a character, not str" @@ -2171,35 +2299,76 @@ def _check(result, expected): tm.assert_series_equal(result, expected) def test_center_ljust_rjust(self): - values = Series(["a", "b", NA, "c", NA, "eeeeee"]) + values = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"]) result = values.str.center(5) - exp = Series([" a ", " b ", NA, " c ", NA, "eeeeee"]) + exp = Series([" a ", " b ", np.nan, " c ", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) result = values.str.ljust(5) - exp = Series(["a ", "b ", NA, "c ", NA, "eeeeee"]) + exp = Series(["a ", "b ", np.nan, "c ", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) result = values.str.rjust(5) - exp = Series([" a", " b", NA, " c", NA, "eeeeee"]) + exp = Series([" a", " b", np.nan, " c", np.nan, "eeeeee"]) tm.assert_almost_equal(result, exp) # mixed - mixed = Series(["a", NA, "b", True, datetime.today(), "c", "eee", None, 1, 2.0]) + mixed = Series( + ["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0] + ) rs = Series(mixed).str.center(5) - xp = Series([" a ", NA, " b ", NA, NA, " c ", " eee ", NA, NA, NA]) + xp = Series( + [ + " a ", + np.nan, + " b ", + np.nan, + np.nan, + " c ", + " eee ", + np.nan, + np.nan, + np.nan, + ] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.ljust(5) - xp = Series(["a ", NA, "b ", NA, NA, "c ", "eee ", NA, NA, NA]) + xp = Series( + [ + "a ", + np.nan, + "b ", + np.nan, + np.nan, + "c ", + "eee ", + np.nan, + np.nan, + np.nan, + ] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rjust(5) - xp = Series([" a", NA, " b", NA, NA, " c", " eee", NA, NA, NA]) + xp = Series( + [ + " a", + np.nan, + " b", + np.nan, + np.nan, + " c", + " eee", + np.nan, + np.nan, + np.nan, + ] + ) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -2268,14 +2437,14 @@ def test_zfill(self): tm.assert_series_equal(result, expected) def test_split(self): - values = Series(["a_b_c", "c_d_e", NA, "f_g_h"]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) result = values.str.split("_") - exp = Series([["a", "b", "c"], ["c", "d", "e"], NA, ["f", "g", "h"]]) + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) tm.assert_series_equal(result, exp) # more than one char - values = Series(["a__b__c", "c__d__e", NA, "f__g__h"]) + values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"]) result = values.str.split("__") tm.assert_series_equal(result, exp) @@ -2283,9 +2452,20 @@ def test_split(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(["a_b_c", NA, "d_e_f", True, datetime.today(), None, 1, 2.0]) + mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0]) result = mixed.str.split("_") - exp = Series([["a", "b", "c"], NA, ["d", "e", "f"], NA, NA, NA, NA, NA]) + exp = Series( + [ + ["a", "b", "c"], + np.nan, + ["d", "e", "f"], + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ] + ) assert isinstance(result, Series) tm.assert_almost_equal(result, exp) @@ -2294,19 +2474,19 @@ def test_split(self): tm.assert_almost_equal(result, exp) # regex split - values = Series(["a,b_c", "c_d,e", NA, "f,g,h"]) + values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"]) result = values.str.split("[,_]") - exp = Series([["a", "b", "c"], ["c", "d", "e"], NA, ["f", "g", "h"]]) + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) tm.assert_series_equal(result, exp) def test_rsplit(self): - values = Series(["a_b_c", "c_d_e", NA, "f_g_h"]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) result = values.str.rsplit("_") - exp = Series([["a", "b", "c"], ["c", "d", "e"], NA, ["f", "g", "h"]]) + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) tm.assert_series_equal(result, exp) # more than one char - values = Series(["a__b__c", "c__d__e", NA, "f__g__h"]) + values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"]) result = values.str.rsplit("__") tm.assert_series_equal(result, exp) @@ -2314,9 +2494,20 @@ def test_rsplit(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(["a_b_c", NA, "d_e_f", True, datetime.today(), None, 1, 2.0]) + mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0]) result = mixed.str.rsplit("_") - exp = Series([["a", "b", "c"], NA, ["d", "e", "f"], NA, NA, NA, NA, NA]) + exp = Series( + [ + ["a", "b", "c"], + np.nan, + ["d", "e", "f"], + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ] + ) assert isinstance(result, Series) tm.assert_almost_equal(result, exp) @@ -2325,15 +2516,15 @@ def test_rsplit(self): tm.assert_almost_equal(result, exp) # regex split is not supported by rsplit - values = Series(["a,b_c", "c_d,e", NA, "f,g,h"]) + values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"]) result = values.str.rsplit("[,_]") - exp = Series([["a,b_c"], ["c_d,e"], NA, ["f,g,h"]]) + exp = Series([["a,b_c"], ["c_d,e"], np.nan, ["f,g,h"]]) tm.assert_series_equal(result, exp) # setting max number of splits, make sure it's from reverse - values = Series(["a_b_c", "c_d_e", NA, "f_g_h"]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) result = values.str.rsplit("_", n=1) - exp = Series([["a_b", "c"], ["c_d", "e"], NA, ["f_g", "h"]]) + exp = Series([["a_b", "c"], ["c_d", "e"], np.nan, ["f_g", "h"]]) tm.assert_series_equal(result, exp) def test_split_blank_string(self): @@ -2408,9 +2599,9 @@ def test_split_to_dataframe(self): 0: ["some", "one"], 1: ["unequal", "of"], 2: ["splits", "these"], - 3: [NA, "things"], - 4: [NA, "is"], - 5: [NA, "not"], + 3: [np.nan, "things"], + 4: [np.nan, "is"], + 5: [np.nan, "not"], } ) tm.assert_frame_equal(result, exp) @@ -2451,7 +2642,7 @@ def test_split_to_multiindex_expand(self): result = idx.str.split("_", expand=True) exp = MultiIndex.from_tuples( [ - ("some", "unequal", "splits", NA, NA, NA), + ("some", "unequal", "splits", np.nan, np.nan, np.nan), ("one", "of", "these", "things", "is", "not"), (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan), (None, None, None, None, None, None), @@ -2516,9 +2707,9 @@ def test_rsplit_to_multiindex_expand(self): def test_split_nan_expand(self): # gh-18450 - s = Series(["foo,bar,baz", NA]) + s = Series(["foo,bar,baz", np.nan]) result = s.str.split(",", expand=True) - exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]]) + exp = DataFrame([["foo", "bar", "baz"], [np.nan, np.nan, np.nan]]) tm.assert_frame_equal(result, exp) # check that these are actually np.nan and not None @@ -2553,67 +2744,79 @@ def test_split_with_name(self): def test_partition_series(self): # https://github.com/pandas-dev/pandas/issues/23558 - values = Series(["a_b_c", "c_d_e", NA, "f_g_h", None]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None]) result = values.str.partition("_", expand=False) exp = Series( - [("a", "_", "b_c"), ("c", "_", "d_e"), NA, ("f", "_", "g_h"), None] + [("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h"), None] ) tm.assert_series_equal(result, exp) result = values.str.rpartition("_", expand=False) exp = Series( - [("a_b", "_", "c"), ("c_d", "_", "e"), NA, ("f_g", "_", "h"), None] + [("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h"), None] ) tm.assert_series_equal(result, exp) # more than one char - values = Series(["a__b__c", "c__d__e", NA, "f__g__h", None]) + values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h", None]) result = values.str.partition("__", expand=False) exp = Series( - [("a", "__", "b__c"), ("c", "__", "d__e"), NA, ("f", "__", "g__h"), None] + [ + ("a", "__", "b__c"), + ("c", "__", "d__e"), + np.nan, + ("f", "__", "g__h"), + None, + ] ) tm.assert_series_equal(result, exp) result = values.str.rpartition("__", expand=False) exp = Series( - [("a__b", "__", "c"), ("c__d", "__", "e"), NA, ("f__g", "__", "h"), None] + [ + ("a__b", "__", "c"), + ("c__d", "__", "e"), + np.nan, + ("f__g", "__", "h"), + None, + ] ) tm.assert_series_equal(result, exp) # None - values = Series(["a b c", "c d e", NA, "f g h", None]) + values = Series(["a b c", "c d e", np.nan, "f g h", None]) result = values.str.partition(expand=False) exp = Series( - [("a", " ", "b c"), ("c", " ", "d e"), NA, ("f", " ", "g h"), None] + [("a", " ", "b c"), ("c", " ", "d e"), np.nan, ("f", " ", "g h"), None] ) tm.assert_series_equal(result, exp) result = values.str.rpartition(expand=False) exp = Series( - [("a b", " ", "c"), ("c d", " ", "e"), NA, ("f g", " ", "h"), None] + [("a b", " ", "c"), ("c d", " ", "e"), np.nan, ("f g", " ", "h"), None] ) tm.assert_series_equal(result, exp) # Not split - values = Series(["abc", "cde", NA, "fgh", None]) + values = Series(["abc", "cde", np.nan, "fgh", None]) result = values.str.partition("_", expand=False) - exp = Series([("abc", "", ""), ("cde", "", ""), NA, ("fgh", "", ""), None]) + exp = Series([("abc", "", ""), ("cde", "", ""), np.nan, ("fgh", "", ""), None]) tm.assert_series_equal(result, exp) result = values.str.rpartition("_", expand=False) - exp = Series([("", "", "abc"), ("", "", "cde"), NA, ("", "", "fgh"), None]) + exp = Series([("", "", "abc"), ("", "", "cde"), np.nan, ("", "", "fgh"), None]) tm.assert_series_equal(result, exp) # unicode - values = Series(["a_b_c", "c_d_e", NA, "f_g_h"]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) result = values.str.partition("_", expand=False) - exp = Series([("a", "_", "b_c"), ("c", "_", "d_e"), NA, ("f", "_", "g_h")]) + exp = Series([("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h")]) tm.assert_series_equal(result, exp) result = values.str.rpartition("_", expand=False) - exp = Series([("a_b", "_", "c"), ("c_d", "_", "e"), NA, ("f_g", "_", "h")]) + exp = Series([("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h")]) tm.assert_series_equal(result, exp) # compare to standard lib @@ -2677,7 +2880,7 @@ def test_partition_index(self): def test_partition_to_dataframe(self): # https://github.com/pandas-dev/pandas/issues/23558 - values = Series(["a_b_c", "c_d_e", NA, "f_g_h", None]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None]) result = values.str.partition("_") exp = DataFrame( { @@ -2698,7 +2901,7 @@ def test_partition_to_dataframe(self): ) tm.assert_frame_equal(result, exp) - values = Series(["a_b_c", "c_d_e", NA, "f_g_h", None]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None]) result = values.str.partition("_", expand=True) exp = DataFrame( { @@ -2746,7 +2949,7 @@ def test_partition_with_name(self): def test_partition_deprecation(self): # GH 22676; depr kwarg "pat" in favor of "sep" - values = Series(["a_b_c", "c_d_e", NA, "f_g_h"]) + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) # str.partition # using sep -> no warning @@ -2779,100 +2982,102 @@ def test_pipe_failures(self): @pytest.mark.parametrize( "start, stop, step, expected", [ - (2, 5, None, Series(["foo", "bar", NA, "baz"])), - (0, 3, -1, Series(["", "", NA, ""])), - (None, None, -1, Series(["owtoofaa", "owtrabaa", NA, "xuqzabaa"])), - (3, 10, 2, Series(["oto", "ato", NA, "aqx"])), - (3, 0, -1, Series(["ofa", "aba", NA, "aba"])), + (2, 5, None, Series(["foo", "bar", np.nan, "baz"])), + (0, 3, -1, Series(["", "", np.nan, ""])), + (None, None, -1, Series(["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"])), + (3, 10, 2, Series(["oto", "ato", np.nan, "aqx"])), + (3, 0, -1, Series(["ofa", "aba", np.nan, "aba"])), ], ) def test_slice(self, start, stop, step, expected): - values = Series(["aafootwo", "aabartwo", NA, "aabazqux"]) + values = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"]) result = values.str.slice(start, stop, step) tm.assert_series_equal(result, expected) # mixed mixed = Series( - ["aafootwo", NA, "aabartwo", True, datetime.today(), None, 1, 2.0] + ["aafootwo", np.nan, "aabartwo", True, datetime.today(), None, 1, 2.0] ) rs = Series(mixed).str.slice(2, 5) - xp = Series(["foo", NA, "bar", NA, NA, NA, NA, NA]) + xp = Series(["foo", np.nan, "bar", np.nan, np.nan, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.slice(2, 5, -1) - xp = Series(["oof", NA, "rab", NA, NA, NA, NA, NA]) + xp = Series(["oof", np.nan, "rab", np.nan, np.nan, np.nan, np.nan, np.nan]) def test_slice_replace(self): - values = Series(["short", "a bit longer", "evenlongerthanthat", "", NA]) + values = Series(["short", "a bit longer", "evenlongerthanthat", "", np.nan]) - exp = Series(["shrt", "a it longer", "evnlongerthanthat", "", NA]) + exp = Series(["shrt", "a it longer", "evnlongerthanthat", "", np.nan]) result = values.str.slice_replace(2, 3) tm.assert_series_equal(result, exp) - exp = Series(["shzrt", "a zit longer", "evznlongerthanthat", "z", NA]) + exp = Series(["shzrt", "a zit longer", "evznlongerthanthat", "z", np.nan]) result = values.str.slice_replace(2, 3, "z") tm.assert_series_equal(result, exp) - exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", NA]) + exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]) result = values.str.slice_replace(2, 2, "z") tm.assert_series_equal(result, exp) - exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", NA]) + exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]) result = values.str.slice_replace(2, 1, "z") tm.assert_series_equal(result, exp) - exp = Series(["shorz", "a bit longez", "evenlongerthanthaz", "z", NA]) + exp = Series(["shorz", "a bit longez", "evenlongerthanthaz", "z", np.nan]) result = values.str.slice_replace(-1, None, "z") tm.assert_series_equal(result, exp) - exp = Series(["zrt", "zer", "zat", "z", NA]) + exp = Series(["zrt", "zer", "zat", "z", np.nan]) result = values.str.slice_replace(None, -2, "z") tm.assert_series_equal(result, exp) - exp = Series(["shortz", "a bit znger", "evenlozerthanthat", "z", NA]) + exp = Series(["shortz", "a bit znger", "evenlozerthanthat", "z", np.nan]) result = values.str.slice_replace(6, 8, "z") tm.assert_series_equal(result, exp) - exp = Series(["zrt", "a zit longer", "evenlongzerthanthat", "z", NA]) + exp = Series(["zrt", "a zit longer", "evenlongzerthanthat", "z", np.nan]) result = values.str.slice_replace(-10, 3, "z") tm.assert_series_equal(result, exp) def test_strip_lstrip_rstrip(self): - values = Series([" aa ", " bb \n", NA, "cc "]) + values = Series([" aa ", " bb \n", np.nan, "cc "]) result = values.str.strip() - exp = Series(["aa", "bb", NA, "cc"]) + exp = Series(["aa", "bb", np.nan, "cc"]) tm.assert_series_equal(result, exp) result = values.str.lstrip() - exp = Series(["aa ", "bb \n", NA, "cc "]) + exp = Series(["aa ", "bb \n", np.nan, "cc "]) tm.assert_series_equal(result, exp) result = values.str.rstrip() - exp = Series([" aa", " bb", NA, "cc"]) + exp = Series([" aa", " bb", np.nan, "cc"]) tm.assert_series_equal(result, exp) def test_strip_lstrip_rstrip_mixed(self): # mixed - mixed = Series([" aa ", NA, " bb \t\n", True, datetime.today(), None, 1, 2.0]) + mixed = Series( + [" aa ", np.nan, " bb \t\n", True, datetime.today(), None, 1, 2.0] + ) rs = Series(mixed).str.strip() - xp = Series(["aa", NA, "bb", NA, NA, NA, NA, NA]) + xp = Series(["aa", np.nan, "bb", np.nan, np.nan, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.lstrip() - xp = Series(["aa ", NA, "bb \t\n", NA, NA, NA, NA, NA]) + xp = Series(["aa ", np.nan, "bb \t\n", np.nan, np.nan, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rstrip() - xp = Series([" aa", NA, " bb", NA, NA, NA, NA, NA]) + xp = Series([" aa", np.nan, " bb", np.nan, np.nan, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -2932,7 +3137,7 @@ def test_wrap(self): # test with pre and post whitespace (non-unicode), NaN, and non-ascii # Unicode values = Series([" pre ", np.nan, "\xac\u20ac\U00008000 abadcafe"]) - xp = Series([" pre", NA, "\xac\u20ac\U00008000 ab\nadcafe"]) + xp = Series([" pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"]) rs = values.str.wrap(6) assert_series_equal(rs, xp) @@ -2944,10 +3149,10 @@ def test_get(self): tm.assert_series_equal(result, expected) # mixed - mixed = Series(["a_b_c", NA, "c_d_e", True, datetime.today(), None, 1, 2.0]) + mixed = Series(["a_b_c", np.nan, "c_d_e", True, datetime.today(), None, 1, 2.0]) rs = Series(mixed).str.split("_").str.get(1) - xp = Series(["b", NA, "d", NA, NA, NA, NA, NA]) + xp = Series(["b", np.nan, "d", np.nan, np.nan, np.nan, np.nan, np.nan]) assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -2991,7 +3196,7 @@ def test_get_complex_nested(self, to_type): def test_contains_moar(self): # PR #1179 - s = Series(["A", "B", "C", "Aaba", "Baca", "", NA, "CABA", "dog", "cat"]) + s = Series(["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"]) result = s.str.contains("a") expected = Series( @@ -3045,11 +3250,11 @@ def test_contains_nan(self): def test_replace_moar(self): # PR #1179 - s = Series(["A", "B", "C", "Aaba", "Baca", "", NA, "CABA", "dog", "cat"]) + s = Series(["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"]) result = s.str.replace("A", "YYY") expected = Series( - ["YYY", "B", "C", "YYYaba", "Baca", "", NA, "CYYYBYYY", "dog", "cat"] + ["YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog", "cat"] ) assert_series_equal(result, expected) @@ -3062,7 +3267,7 @@ def test_replace_moar(self): "YYYYYYbYYY", "BYYYcYYY", "", - NA, + np.nan, "CYYYBYYY", "dog", "cYYYt", @@ -3079,7 +3284,7 @@ def test_replace_moar(self): "XX-XX ba", "XX-XX ca", "", - NA, + np.nan, "XX-XX BA", "XX-XX ", "XX-XX t", @@ -3089,7 +3294,17 @@ def test_replace_moar(self): def test_string_slice_get_syntax(self): s = Series( - ["YYY", "B", "C", "YYYYYYbYYY", "BYYYcYYY", NA, "CYYYBYYY", "dog", "cYYYt"] + [ + "YYY", + "B", + "C", + "YYYYYYbYYY", + "BYYYcYYY", + np.nan, + "CYYYBYYY", + "dog", + "cYYYt", + ] ) result = s.str[0] @@ -3266,8 +3481,8 @@ def test_method_on_bytes(self): def test_casefold(self): # GH25405 - expected = Series(["ss", NA, "case", "ssd"]) - s = Series(["ß", NA, "case", "ßd"]) + expected = Series(["ss", np.nan, "case", "ssd"]) + s = Series(["ß", np.nan, "case", "ßd"]) result = s.str.casefold() tm.assert_series_equal(result, expected)