Skip to content

Commit 44b73a4

Browse files
committed
CLN: Remove unwanted patterns
1 parent f5d2e67 commit 44b73a4

File tree

2 files changed

+24
-19
lines changed

2 files changed

+24
-19
lines changed

pandas/io/stata.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,18 +1775,21 @@ def _do_convert_categoricals(
17751775
if label in value_labels:
17761776
# Explicit call with ordered=True
17771777
vl = value_label_dict[label]
1778-
keys = np.array([k for k in vl.keys()])
1778+
keys = np.array(list(vl.keys()))
17791779
column = data[col]
1780-
if self._chunksize is not None and column.isin(keys).all():
1780+
key_matches = column.isin(keys)
1781+
if self._chunksize is not None and key_matches.all():
1782+
initial_categories = keys
17811783
# If all categories are in the keys and we are iterating,
17821784
# use the same keys for all chunks. If some are missing
17831785
# value labels, then we will fall back to the categories
17841786
# varying across chunks.
1785-
initial_categories = keys
1786-
warnings.warn(
1787-
categorical_conversion_warning, CategoricalConversionWarning
1788-
)
17891787
else:
1788+
if self._chunksize is not None:
1789+
# warn is using an iterator
1790+
warnings.warn(
1791+
categorical_conversion_warning, CategoricalConversionWarning
1792+
)
17901793
initial_categories = None
17911794
cat_data = Categorical(
17921795
column, categories=initial_categories, ordered=order_categoricals
@@ -1801,7 +1804,7 @@ def _do_convert_categoricals(
18011804
categories.append(category)
18021805
else:
18031806
# If all cats are matched, we can use the values
1804-
categories = [v for v in vl.values()]
1807+
categories = list(vl.values())
18051808
try:
18061809
# Try to catch duplicate categories
18071810
cat_data.categories = categories

pandas/tests/io/test_stata.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,18 +1881,19 @@ def test_chunked_categorical(version):
18811881

18821882
def test_chunked_categorical_partial(dirpath):
18831883
dta_file = os.path.join(dirpath, "stata-dta-partially-labeled.dta")
1884-
reader = StataReader(dta_file, chunksize=2)
18851884
values = ["a", "b", "a", "b", 3.0]
1886-
with pytest.warns(CategoricalConversionWarning, match="One or more series"):
1887-
for i, block in enumerate(reader):
1888-
assert list(block.cats) == values[2 * i : 2 * (i + 1)]
1889-
if i < 2:
1890-
idx = pd.Index(["a", "b"])
1891-
else:
1892-
idx = pd.Float64Index([3.0])
1893-
tm.assert_index_equal(block.cats.cat.categories, idx)
1894-
reader = StataReader(dta_file, chunksize=5)
1895-
large_chunk = reader.__next__()
1885+
with StataReader(dta_file, chunksize=2) as reader:
1886+
with tm.assert_produces_warning(CategoricalConversionWarning):
1887+
for i, block in enumerate(reader):
1888+
assert list(block.cats) == values[2 * i : 2 * (i + 1)]
1889+
if i < 2:
1890+
idx = pd.Index(["a", "b"])
1891+
else:
1892+
idx = pd.Float64Index([3.0])
1893+
tm.assert_index_equal(block.cats.cat.categories, idx)
1894+
with tm.assert_produces_warning(CategoricalConversionWarning):
1895+
with StataReader(dta_file, chunksize=5) as reader:
1896+
large_chunk = reader.__next__()
18961897
direct = read_stata(dta_file)
18971898
tm.assert_frame_equal(direct, large_chunk)
18981899

@@ -1906,4 +1907,5 @@ def test_iterator_errors(dirpath):
19061907
with pytest.raises(ValueError, match="chunksize must be a positive"):
19071908
StataReader(dta_file, chunksize="apple")
19081909
with pytest.raises(ValueError, match="chunksize must be set to a positive"):
1909-
StataReader(dta_file).__next__()
1910+
with StataReader(dta_file) as reader:
1911+
reader.__next__()

0 commit comments

Comments
 (0)