-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
TST: Testing for mixed int/str Index #61349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 65 commits
29039f9
65de448
0816a26
b87036f
946f99b
a671eb7
416a6ae
2e63667
5550b1d
33e2a34
d7b534e
642734e
dea15de
03c3b0a
5d1c154
c10c263
edb84e4
af140a8
8992100
1fe92f9
599df6d
3256953
bf05b29
c856799
ed90c56
e3f1eb2
a784a90
eb2f210
710e4d5
079aeb1
545f04c
a16f5b3
413dad1
a6b958b
0c0ef09
d3a2378
355a058
a2d5fbf
ec189e4
771c098
25ba609
acd31b1
b522022
64bf3fe
4c4e673
96c26a3
dfc6a4a
01a93c8
c5215e2
31e4730
b9b6ba4
d661599
ec5a628
f959d03
88e1b81
7b12c91
6a764d7
03b5a69
2856da0
ae6dd39
ed182bf
ed0feac
dc53d28
9418256
867dfab
a61324a
0d6ea71
105352a
2ad71a5
acfec32
e596946
1466428
f1d91b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
xaris96 marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import pytest | ||
|
||
import pandas as pd | ||
|
||
|
||
def test_mixed_int_string_index(): | ||
idx = pd.Index([0, "a", 1, "b", 2, "c"]) | ||
|
||
# Check if the index is of type Index | ||
assert len(idx) == 6 | ||
assert idx[1] == "a" | ||
assert idx[-1] == "c" | ||
|
||
# Check if the index is sorted (it should not be) | ||
with pytest.raises(TypeError): | ||
idx.sort_values() | ||
|
||
# Check if the index is unique | ||
assert idx.is_unique | ||
|
||
# Check if the index contains a specific value | ||
assert idx.get_loc("a") == 1 | ||
with pytest.raises(KeyError): | ||
idx.get_loc("z") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -155,6 +155,14 @@ def test_numpy_ufuncs_reductions(index, func, request): | |
# TODO: overlap with tests.series.test_ufunc.test_reductions | ||
if len(index) == 0: | ||
pytest.skip("Test doesn't make sense for empty index.") | ||
has_str = any(isinstance(x, str) for x in index) | ||
has_int = any(isinstance(x, int) for x in index) | ||
if has_str and has_int: | ||
Comment on lines
+158
to
+160
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please check |
||
request.applymarker( | ||
pytest.mark.xfail( | ||
reason="Cannot compare mixed types (int and str) in ufunc reductions" | ||
) | ||
) | ||
|
||
if isinstance(index, CategoricalIndex) and index.dtype.ordered is False: | ||
with pytest.raises(TypeError, match="is not ordered for"): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,40 +63,23 @@ def index_flat2(index_flat): | |
|
||
|
||
def test_union_same_types(index): | ||
# Union with a non-unique, non-monotonic index raises error | ||
# Only needed for bool index factory | ||
# mixed int string | ||
if index.equals(Index([0, "a", 1, "b", 2, "c"])): | ||
index = index.astype(str) | ||
|
||
idx1 = index.sort_values() | ||
idx2 = index.sort_values() | ||
assert idx1.union(idx2).dtype == idx1.dtype | ||
assert idx1.union(idx2, sort=False).dtype == idx1.dtype | ||
|
||
|
||
def test_union_different_types(index_flat, index_flat2, request): | ||
# This test only considers combinations of indices | ||
# GH 23525 | ||
idx1 = index_flat | ||
idx2 = index_flat2 | ||
|
||
if ( | ||
not idx1.is_unique | ||
and not idx2.is_unique | ||
and idx1.dtype.kind == "i" | ||
and idx2.dtype.kind == "b" | ||
) or ( | ||
not idx2.is_unique | ||
and not idx1.is_unique | ||
and idx2.dtype.kind == "i" | ||
and idx1.dtype.kind == "b" | ||
): | ||
# Each condition had idx[1|2].is_monotonic_decreasing | ||
# but failed when e.g. | ||
# idx1 = Index( | ||
# [True, True, True, True, True, True, True, True, False, False], dtype='bool' | ||
# ) | ||
# idx2 = Index([0, 0, 1, 1, 2, 2], dtype='int64') | ||
mark = pytest.mark.xfail( | ||
reason="GH#44000 True==1", raises=ValueError, strict=False | ||
) | ||
request.applymarker(mark) | ||
# mixed int string | ||
target_index = Index([0, "a", 1, "b", 2, "c"]) | ||
if idx1.equals(target_index) or idx2.equals(target_index): | ||
idx1 = idx1.astype(str) | ||
idx2 = idx2.astype(str) | ||
|
||
common_dtype = find_common_type([idx1.dtype, idx2.dtype]) | ||
|
||
|
@@ -107,7 +90,6 @@ def test_union_different_types(index_flat, index_flat2, request): | |
elif (idx1.dtype.kind == "c" and (not lib.is_np_dtype(idx2.dtype, "iufc"))) or ( | ||
idx2.dtype.kind == "c" and (not lib.is_np_dtype(idx1.dtype, "iufc")) | ||
): | ||
# complex objects non-sortable | ||
warn = RuntimeWarning | ||
elif ( | ||
isinstance(idx1.dtype, PeriodDtype) and isinstance(idx2.dtype, CategoricalDtype) | ||
|
@@ -129,12 +111,17 @@ def test_union_different_types(index_flat, index_flat2, request): | |
|
||
# Union with a non-unique, non-monotonic index raises error | ||
# This applies to the boolean index | ||
idx1 = idx1.sort_values() | ||
idx2 = idx2.sort_values() | ||
try: | ||
idx1.sort_values() | ||
idx2.sort_values() | ||
except TypeError: | ||
result = idx1.union(idx2, sort=False) | ||
assert result.dtype == "object" | ||
return | ||
|
||
with tm.assert_produces_warning(warn, match=msg): | ||
res1 = idx1.union(idx2) | ||
res2 = idx2.union(idx1) | ||
res1 = idx1.union(idx2, sort=False) | ||
res2 = idx2.union(idx1, sort=False) | ||
|
||
if any_uint64 and (idx1_signed or idx2_signed): | ||
assert res1.dtype == np.dtype("O") | ||
|
@@ -223,7 +210,7 @@ def test_set_ops_error_cases(self, case, method, index): | |
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") | ||
def test_intersection_base(self, index): | ||
if isinstance(index, CategoricalIndex): | ||
pytest.skip(f"Not relevant for {type(index).__name__}") | ||
pytest.mark.xfail(reason="Not relevant for CategoricalIndex") | ||
|
||
first = index[:5].unique() | ||
second = index[:3].unique() | ||
|
@@ -248,12 +235,21 @@ def test_intersection_base(self, index): | |
|
||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") | ||
def test_union_base(self, index): | ||
if index.inferred_type in ["mixed", "mixed-integer"]: | ||
pytest.mark.xfail(reason="Not relevant for mixed types") | ||
|
||
index = index.unique() | ||
|
||
# Mixed int string | ||
if index.equals(Index([0, "a", 1, "b", 2, "c"])): | ||
index = index.astype(str) | ||
|
||
first = index[3:] | ||
second = index[:5] | ||
everything = index | ||
|
||
union = first.union(second) | ||
# Default sort=None | ||
union = first.union(second, sort=None) | ||
tm.assert_index_equal(union.sort_values(), everything.sort_values()) | ||
|
||
if isinstance(index.dtype, DatetimeTZDtype): | ||
|
@@ -264,7 +260,7 @@ def test_union_base(self, index): | |
# GH#10149 | ||
cases = [second.to_numpy(), second.to_series(), second.to_list()] | ||
for case in cases: | ||
result = first.union(case) | ||
result = first.union(case, sort=None) | ||
assert equal_contents(result, everything) | ||
|
||
if isinstance(index, MultiIndex): | ||
|
@@ -314,7 +310,8 @@ def test_symmetric_difference(self, index, using_infer_string, request): | |
# index fixture has e.g. an index of bools that does not satisfy this, | ||
# another with [0, 0, 1, 1, 2, 2] | ||
pytest.skip("Index values no not satisfy test condition.") | ||
|
||
if index.equals(Index([0, "a", 1, "b", 2, "c"])): | ||
index = index.astype(str) | ||
first = index[1:] | ||
second = index[:-1] | ||
answer = index[[0, -1]] | ||
|
@@ -395,6 +392,9 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): | |
else: | ||
index = index_flat | ||
|
||
if index.dtype == "object": | ||
index = index.astype(str) | ||
|
||
# test copy.union(subset) - need sort for unicode and string | ||
first = index.copy().set_names(fname) | ||
second = index[1:].set_names(sname) | ||
|
@@ -464,6 +464,8 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): | |
else: | ||
index = index_flat | ||
|
||
if index.dtype == "object": | ||
index = index.astype(str) | ||
# test copy.intersection(subset) - need sort for unicode and string | ||
first = index.copy().set_names(fname) | ||
second = index[1:].set_names(sname) | ||
|
@@ -915,6 +917,19 @@ def test_difference_incomparable_true(self, opname): | |
def test_symmetric_difference_mi(self, sort): | ||
index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) | ||
index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) | ||
|
||
def has_mixed_types(level): | ||
return any(isinstance(x, str) for x in level) and any( | ||
isinstance(x, int) for x in level | ||
) | ||
|
||
for idx in [index1, index2]: | ||
for lvl in range(idx.nlevels): | ||
if has_mixed_types(idx.get_level_values(lvl)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please check |
||
pytest.skip( | ||
f"Mixed types in MultiIndex level {lvl} are not orderable" | ||
) | ||
Comment on lines
+939
to
+941
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add an |
||
|
||
result = index1.symmetric_difference(index2, sort=sort) | ||
expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)]) | ||
if sort is None: | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -63,22 +63,31 @@ def test_factorize_complex(self): | |||||||||
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=complex) | ||||||||||
tm.assert_numpy_array_equal(uniques, expected_uniques) | ||||||||||
|
||||||||||
@pytest.mark.parametrize( | ||||||||||
"index_or_series_obj", [[1, 2, 3], ["a", "b", "c"], [0, "a", 1, "b", 2, "c"]] | ||||||||||
) | ||||||||||
@pytest.mark.parametrize("sort", [True, False]) | ||||||||||
Comment on lines
+66
to
+69
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
This overrides the original fixtures being used here |
||||||||||
def test_factorize(self, index_or_series_obj, sort): | ||||||||||
obj = index_or_series_obj | ||||||||||
obj = Index(index_or_series_obj) | ||||||||||
|
||||||||||
if obj.empty: | ||||||||||
pytest.skip("Skipping test for empty Index") | ||||||||||
|
||||||||||
if obj.name == "mixed-int-string" or obj.name is None: | ||||||||||
pytest.skip( | ||||||||||
"Skipping test for mixed-int-string due " | ||||||||||
"to unsupported comparison between str and int" | ||||||||||
) | ||||||||||
|
||||||||||
result_codes, result_uniques = obj.factorize(sort=sort) | ||||||||||
|
||||||||||
constructor = Index | ||||||||||
if isinstance(obj, MultiIndex): | ||||||||||
constructor = MultiIndex.from_tuples | ||||||||||
expected_arr = obj.unique() | ||||||||||
if expected_arr.dtype == np.float16: | ||||||||||
expected_arr = expected_arr.astype(np.float32) | ||||||||||
expected_uniques = constructor(expected_arr) | ||||||||||
if ( | ||||||||||
isinstance(obj, Index) | ||||||||||
and expected_uniques.dtype == bool | ||||||||||
and obj.dtype == object | ||||||||||
): | ||||||||||
|
||||||||||
if expected_uniques.dtype == bool and obj.dtype == object: | ||||||||||
expected_uniques = expected_uniques.astype(object) | ||||||||||
|
||||||||||
if sort: | ||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.