Skip to content

Commit b314627

Browse files
authored
CLN: TODOs and FIXMEs (#44455)
1 parent a56f6ee commit b314627

File tree

19 files changed

+205
-155
lines changed

19 files changed

+205
-155
lines changed

pandas/core/array_algos/quantile.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
import numpy as np
66

7-
from pandas._typing import ArrayLike
7+
from pandas._typing import (
8+
ArrayLike,
9+
npt,
10+
)
811

912
from pandas.core.dtypes.common import is_sparse
1013
from pandas.core.dtypes.missing import (
@@ -18,7 +21,9 @@
1821
from pandas.core.arrays import ExtensionArray
1922

2023

21-
def quantile_compat(values: ArrayLike, qs: np.ndarray, interpolation: str) -> ArrayLike:
24+
def quantile_compat(
25+
values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
26+
) -> ArrayLike:
2227
"""
2328
Compute the quantiles of the given values for each quantile in `qs`.
2429
@@ -55,7 +60,7 @@ def _quantile_with_mask(
5560
values: np.ndarray,
5661
mask: np.ndarray,
5762
fill_value,
58-
qs: np.ndarray,
63+
qs: npt.NDArray[np.float64],
5964
interpolation: str,
6065
) -> np.ndarray:
6166
"""
@@ -112,7 +117,7 @@ def _quantile_with_mask(
112117

113118

114119
def _quantile_ea_compat(
115-
values: ExtensionArray, qs: np.ndarray, interpolation: str
120+
values: ExtensionArray, qs: npt.NDArray[np.float64], interpolation: str
116121
) -> ExtensionArray:
117122
"""
118123
ExtensionArray compatibility layer for _quantile_with_mask.
@@ -158,7 +163,7 @@ def _quantile_ea_compat(
158163

159164

160165
def _quantile_ea_fallback(
161-
values: ExtensionArray, qs: np.ndarray, interpolation: str
166+
values: ExtensionArray, qs: npt.NDArray[np.float64], interpolation: str
162167
) -> ExtensionArray:
163168
"""
164169
quantile compatibility for ExtensionArray subclasses that do not

pandas/core/dtypes/cast.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,6 +1584,7 @@ def try_timedelta(v: np.ndarray) -> np.ndarray:
15841584
value = try_datetime(v) # type: ignore[assignment]
15851585

15861586
if value.dtype.kind in ["m", "M"] and seen_str:
1587+
# TODO(2.0): enforcing this deprecation should close GH#40111
15871588
warnings.warn(
15881589
f"Inferring {value.dtype} from data containing strings is deprecated "
15891590
"and will be removed in a future version. To retain the old behavior "

pandas/core/indexing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1902,11 +1902,13 @@ def _setitem_single_block(self, indexer, value, name: str):
19021902
):
19031903
col = item_labels[indexer[info_axis]]
19041904
if len(item_labels.get_indexer_for([col])) == 1:
1905+
# e.g. test_loc_setitem_empty_append_expands_rows
19051906
loc = item_labels.get_loc(col)
19061907
self.obj._iset_item(loc, value, inplace=True)
19071908
return
19081909

1909-
indexer = maybe_convert_ix(*indexer)
1910+
indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
1911+
19101912
if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
19111913
# TODO(EA): ExtensionBlock.setitem this causes issues with
19121914
# setting for extensionarrays that store dicts. Need to decide

pandas/core/internals/blocks.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -930,10 +930,7 @@ def setitem(self, indexer, value):
930930
value = setitem_datetimelike_compat(values, len(values[indexer]), value)
931931
values[indexer] = value
932932

933-
if transpose:
934-
values = values.T
935-
block = type(self)(values, placement=self._mgr_locs, ndim=self.ndim)
936-
return block
933+
return self
937934

938935
def putmask(self, mask, new) -> list[Block]:
939936
"""
@@ -961,9 +958,7 @@ def putmask(self, mask, new) -> list[Block]:
961958
new = self.fill_value
962959

963960
if self._can_hold_element(new):
964-
# error: Argument 1 to "putmask_without_repeat" has incompatible type
965-
# "Union[ndarray, ExtensionArray]"; expected "ndarray"
966-
putmask_without_repeat(self.values.T, mask, new) # type: ignore[arg-type]
961+
putmask_without_repeat(values.T, mask, new)
967962
return [self]
968963

969964
elif noop:

pandas/core/internals/managers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -861,9 +861,9 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
861861
"""
862862
# We have 6 tests that get here with a slice
863863
indexer = (
864-
np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64")
864+
np.arange(indexer.start, indexer.stop, indexer.step, dtype=np.intp)
865865
if isinstance(indexer, slice)
866-
else np.asanyarray(indexer, dtype="int64")
866+
else np.asanyarray(indexer, dtype=np.intp)
867867
)
868868

869869
n = self.shape[axis]

pandas/core/ops/missing.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from pandas.core.ops import roperator
3535

3636

37-
def fill_zeros(result, x, y):
37+
def _fill_zeros(result, x, y):
3838
"""
3939
If this is a reversed op, then flip x,y
4040
@@ -102,9 +102,6 @@ def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray:
102102
>>> mask_zero_div_zero(x, y, result)
103103
array([ inf, nan, -inf])
104104
"""
105-
if not isinstance(result, np.ndarray):
106-
# FIXME: SparseArray would raise TypeError with np.putmask
107-
return result
108105

109106
if is_scalar(y):
110107
y = np.array(y)
@@ -141,7 +138,7 @@ def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray:
141138

142139
def dispatch_fill_zeros(op, left, right, result):
143140
"""
144-
Call fill_zeros with the appropriate fill value depending on the operation,
141+
Call _fill_zeros with the appropriate fill value depending on the operation,
145142
with special logic for divmod and rdivmod.
146143
147144
Parameters
@@ -163,12 +160,12 @@ def dispatch_fill_zeros(op, left, right, result):
163160
if op is divmod:
164161
result = (
165162
mask_zero_div_zero(left, right, result[0]),
166-
fill_zeros(result[1], left, right),
163+
_fill_zeros(result[1], left, right),
167164
)
168165
elif op is roperator.rdivmod:
169166
result = (
170167
mask_zero_div_zero(right, left, result[0]),
171-
fill_zeros(result[1], right, left),
168+
_fill_zeros(result[1], right, left),
172169
)
173170
elif op is operator.floordiv:
174171
# Note: no need to do this for truediv; in py3 numpy behaves the way
@@ -179,7 +176,7 @@ def dispatch_fill_zeros(op, left, right, result):
179176
# we want.
180177
result = mask_zero_div_zero(right, left, result)
181178
elif op is operator.mod:
182-
result = fill_zeros(result, left, right)
179+
result = _fill_zeros(result, left, right)
183180
elif op is roperator.rmod:
184-
result = fill_zeros(result, right, left)
181+
result = _fill_zeros(result, right, left)
185182
return result

pandas/tests/base/test_value_counts.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import collections
22
from datetime import timedelta
3-
from io import StringIO
43

54
import numpy as np
65
import pytest
@@ -190,19 +189,21 @@ def test_value_counts_datetime64(index_or_series):
190189

191190
# GH 3002, datetime64[ns]
192191
# don't test names though
193-
txt = "\n".join(
194-
[
195-
"xxyyzz20100101PIE",
196-
"xxyyzz20100101GUM",
197-
"xxyyzz20100101EGG",
198-
"xxyyww20090101EGG",
199-
"foofoo20080909PIE",
200-
"foofoo20080909GUM",
201-
]
202-
)
203-
f = StringIO(txt)
204-
df = pd.read_fwf(
205-
f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]
192+
df = pd.DataFrame(
193+
{
194+
"person_id": ["xxyyzz", "xxyyzz", "xxyyzz", "xxyyww", "foofoo", "foofoo"],
195+
"dt": pd.to_datetime(
196+
[
197+
"2010-01-01",
198+
"2010-01-01",
199+
"2010-01-01",
200+
"2009-01-01",
201+
"2008-09-09",
202+
"2008-09-09",
203+
]
204+
),
205+
"food": ["PIE", "GUM", "EGG", "EGG", "PIE", "GUM"],
206+
}
206207
)
207208

208209
s = klass(df["dt"].copy())

pandas/tests/frame/methods/test_combine_first.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,15 +209,15 @@ def test_combine_first_align_nan(self):
209209
)
210210
tm.assert_frame_equal(res, exp)
211211
assert res["a"].dtype == "datetime64[ns]"
212-
# ToDo: this must be int64
212+
# TODO: this must be int64
213213
assert res["b"].dtype == "int64"
214214

215215
res = dfa.iloc[:0].combine_first(dfb)
216216
exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"])
217217
tm.assert_frame_equal(res, exp)
218-
# ToDo: this must be datetime64
218+
# TODO: this must be datetime64
219219
assert res["a"].dtype == "float64"
220-
# ToDo: this must be int64
220+
# TODO: this must be int64
221221
assert res["b"].dtype == "int64"
222222

223223
def test_combine_first_timezone(self):

pandas/tests/frame/methods/test_replace.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
from datetime import datetime
4-
from io import StringIO
54
import re
65

76
import numpy as np
@@ -912,12 +911,14 @@ def test_replace_dict_tuple_list_ordering_remains_the_same(self):
912911
tm.assert_frame_equal(res3, expected)
913912

914913
def test_replace_doesnt_replace_without_regex(self):
915-
raw = """fol T_opp T_Dir T_Enh
916-
0 1 0 0 vo
917-
1 2 vr 0 0
918-
2 2 0 0 0
919-
3 3 0 bt 0"""
920-
df = pd.read_csv(StringIO(raw), sep=r"\s+")
914+
df = DataFrame(
915+
{
916+
"fol": [1, 2, 2, 3],
917+
"T_opp": ["0", "vr", "0", "0"],
918+
"T_Dir": ["0", "0", "0", "bt"],
919+
"T_Enh": ["vo", "0", "0", "0"],
920+
}
921+
)
921922
res = df.replace({r"\D": 1})
922923
tm.assert_frame_equal(df, res)
923924

pandas/tests/frame/test_block_internals.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
datetime,
33
timedelta,
44
)
5-
from io import StringIO
65
import itertools
76

87
import numpy as np
@@ -289,15 +288,29 @@ def test_pickle(self, float_string_frame, timezone_frame):
289288
def test_consolidate_datetime64(self):
290289
# numpy vstack bug
291290

292-
data = (
293-
"starting,ending,measure\n"
294-
"2012-06-21 00:00,2012-06-23 07:00,77\n"
295-
"2012-06-23 07:00,2012-06-23 16:30,65\n"
296-
"2012-06-23 16:30,2012-06-25 08:00,77\n"
297-
"2012-06-25 08:00,2012-06-26 12:00,0\n"
298-
"2012-06-26 12:00,2012-06-27 08:00,77\n"
291+
df = DataFrame(
292+
{
293+
"starting": pd.to_datetime(
294+
[
295+
"2012-06-21 00:00",
296+
"2012-06-23 07:00",
297+
"2012-06-23 16:30",
298+
"2012-06-25 08:00",
299+
"2012-06-26 12:00",
300+
]
301+
),
302+
"ending": pd.to_datetime(
303+
[
304+
"2012-06-23 07:00",
305+
"2012-06-23 16:30",
306+
"2012-06-25 08:00",
307+
"2012-06-26 12:00",
308+
"2012-06-27 08:00",
309+
]
310+
),
311+
"measure": [77, 65, 77, 0, 77],
312+
}
299313
)
300-
df = pd.read_csv(StringIO(data), parse_dates=[0, 1])
301314

302315
ser_starting = df.starting
303316
ser_starting.index = ser_starting.values

pandas/tests/frame/test_query_eval.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from io import StringIO
21
import operator
32

43
import numpy as np
@@ -1020,23 +1019,19 @@ def test_object_array_eq_ne(self, parser, engine):
10201019

10211020
def test_query_with_nested_strings(self, parser, engine):
10221021
skip_if_no_pandas_parser(parser)
1023-
raw = """id event timestamp
1024-
1 "page 1 load" 1/1/2014 0:00:01
1025-
1 "page 1 exit" 1/1/2014 0:00:31
1026-
2 "page 2 load" 1/1/2014 0:01:01
1027-
2 "page 2 exit" 1/1/2014 0:01:31
1028-
3 "page 3 load" 1/1/2014 0:02:01
1029-
3 "page 3 exit" 1/1/2014 0:02:31
1030-
4 "page 1 load" 2/1/2014 1:00:01
1031-
4 "page 1 exit" 2/1/2014 1:00:31
1032-
5 "page 2 load" 2/1/2014 1:01:01
1033-
5 "page 2 exit" 2/1/2014 1:01:31
1034-
6 "page 3 load" 2/1/2014 1:02:01
1035-
6 "page 3 exit" 2/1/2014 1:02:31
1036-
"""
1037-
df = pd.read_csv(
1038-
StringIO(raw), sep=r"\s{2,}", engine="python", parse_dates=["timestamp"]
1022+
events = [
1023+
f"page {n} {act}" for n in range(1, 4) for act in ["load", "exit"]
1024+
] * 2
1025+
stamps1 = date_range("2014-01-01 0:00:01", freq="30s", periods=6)
1026+
stamps2 = date_range("2014-02-01 1:00:01", freq="30s", periods=6)
1027+
df = DataFrame(
1028+
{
1029+
"id": np.arange(1, 7).repeat(2),
1030+
"event": events,
1031+
"timestamp": stamps1.append(stamps2),
1032+
}
10391033
)
1034+
10401035
expected = df[df.event == '"page 1 load"']
10411036
res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine)
10421037
tm.assert_frame_equal(expected, res)

pandas/tests/groupby/test_groupby.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from datetime import datetime
22
from decimal import Decimal
3-
from io import StringIO
43

54
import numpy as np
65
import pytest
@@ -20,7 +19,6 @@
2019
Timedelta,
2120
Timestamp,
2221
date_range,
23-
read_csv,
2422
to_datetime,
2523
)
2624
import pandas._testing as tm
@@ -1132,14 +1130,18 @@ def test_grouping_ndarray(df):
11321130

11331131

11341132
def test_groupby_wrong_multi_labels():
1135-
data = """index,foo,bar,baz,spam,data
1136-
0,foo1,bar1,baz1,spam2,20
1137-
1,foo1,bar2,baz1,spam3,30
1138-
2,foo2,bar2,baz1,spam2,40
1139-
3,foo1,bar1,baz2,spam1,50
1140-
4,foo3,bar1,baz2,spam1,60"""
1141-
1142-
data = read_csv(StringIO(data), index_col=0)
1133+
1134+
index = Index([0, 1, 2, 3, 4], name="index")
1135+
data = DataFrame(
1136+
{
1137+
"foo": ["foo1", "foo1", "foo2", "foo1", "foo3"],
1138+
"bar": ["bar1", "bar2", "bar2", "bar1", "bar1"],
1139+
"baz": ["baz1", "baz1", "baz1", "baz2", "baz2"],
1140+
"spam": ["spam2", "spam3", "spam2", "spam1", "spam1"],
1141+
"data": [20, 30, 40, 50, 60],
1142+
},
1143+
index=index,
1144+
)
11431145

11441146
grouped = data.groupby(["foo", "bar", "baz", "spam"])
11451147

pandas/tests/indexing/test_indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def test_dups_fancy_indexing_only_missing_label(self):
277277
):
278278
dfnu.loc[["E"]]
279279

280-
# ToDo: check_index_type can be True after GH 11497
280+
# TODO: check_index_type can be True after GH 11497
281281

282282
@pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")])
283283
def test_dups_fancy_indexing_missing_label(self, vals):

pandas/tests/indexing/test_loc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1394,7 +1394,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
13941394
tz = tz_naive_fixture
13951395
idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz)
13961396
expected = DataFrame(1.2, index=idx, columns=["var"])
1397-
# if result started off with object dtype, tehn the .loc.__setitem__
1397+
# if result started off with object dtype, then the .loc.__setitem__
13981398
# below would retain object dtype
13991399
result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
14001400
result.loc[:, idxer] = expected

0 commit comments

Comments
 (0)