Skip to content

Commit 212824c

Browse files
committed
restore all
1 parent 1c805aa commit 212824c

File tree

11 files changed

+4937
-0
lines changed

11 files changed

+4937
-0
lines changed

pandas/tests/io/test_clipboard.py

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
from textwrap import dedent
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import (
7+
DataFrame,
8+
get_option,
9+
read_clipboard,
10+
)
11+
import pandas._testing as tm
12+
13+
from pandas.io.clipboard import (
14+
clipboard_get,
15+
clipboard_set,
16+
)
17+
18+
19+
def build_kwargs(sep, excel):
20+
kwargs = {}
21+
if excel != "default":
22+
kwargs["excel"] = excel
23+
if sep != "default":
24+
kwargs["sep"] = sep
25+
return kwargs
26+
27+
28+
@pytest.fixture(
29+
params=[
30+
"delims",
31+
"utf8",
32+
"utf16",
33+
"string",
34+
"long",
35+
"nonascii",
36+
"colwidth",
37+
"mixed",
38+
"float",
39+
"int",
40+
]
41+
)
42+
def df(request):
43+
data_type = request.param
44+
45+
if data_type == "delims":
46+
return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]})
47+
elif data_type == "utf8":
48+
return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]})
49+
elif data_type == "utf16":
50+
return DataFrame(
51+
{"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]}
52+
)
53+
elif data_type == "string":
54+
return tm.makeCustomDataframe(
55+
5, 3, c_idx_type="s", r_idx_type="i", c_idx_names=[None], r_idx_names=[None]
56+
)
57+
elif data_type == "long":
58+
max_rows = get_option("display.max_rows")
59+
return tm.makeCustomDataframe(
60+
max_rows + 1,
61+
3,
62+
data_gen_f=lambda *args: np.random.randint(2),
63+
c_idx_type="s",
64+
r_idx_type="i",
65+
c_idx_names=[None],
66+
r_idx_names=[None],
67+
)
68+
elif data_type == "nonascii":
69+
return DataFrame({"en": "in English".split(), "es": "en español".split()})
70+
elif data_type == "colwidth":
71+
_cw = get_option("display.max_colwidth") + 1
72+
return tm.makeCustomDataframe(
73+
5,
74+
3,
75+
data_gen_f=lambda *args: "x" * _cw,
76+
c_idx_type="s",
77+
r_idx_type="i",
78+
c_idx_names=[None],
79+
r_idx_names=[None],
80+
)
81+
elif data_type == "mixed":
82+
return DataFrame(
83+
{
84+
"a": np.arange(1.0, 6.0) + 0.01,
85+
"b": np.arange(1, 6).astype(np.int64),
86+
"c": list("abcde"),
87+
}
88+
)
89+
elif data_type == "float":
90+
return tm.makeCustomDataframe(
91+
5,
92+
3,
93+
data_gen_f=lambda r, c: float(r) + 0.01,
94+
c_idx_type="s",
95+
r_idx_type="i",
96+
c_idx_names=[None],
97+
r_idx_names=[None],
98+
)
99+
elif data_type == "int":
100+
return tm.makeCustomDataframe(
101+
5,
102+
3,
103+
data_gen_f=lambda *args: np.random.randint(2),
104+
c_idx_type="s",
105+
r_idx_type="i",
106+
c_idx_names=[None],
107+
r_idx_names=[None],
108+
)
109+
else:
110+
raise ValueError
111+
112+
113+
@pytest.fixture
114+
def mock_clipboard(monkeypatch, request):
115+
"""Fixture mocking clipboard IO.
116+
117+
This mocks pandas.io.clipboard.clipboard_get and
118+
pandas.io.clipboard.clipboard_set.
119+
120+
This uses a local dict for storing data. The dictionary
121+
key used is the test ID, available with ``request.node.name``.
122+
123+
This returns the local dictionary, for direct manipulation by
124+
tests.
125+
"""
126+
# our local clipboard for tests
127+
_mock_data = {}
128+
129+
def _mock_set(data):
130+
_mock_data[request.node.name] = data
131+
132+
def _mock_get():
133+
return _mock_data[request.node.name]
134+
135+
monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set)
136+
monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get)
137+
138+
yield _mock_data
139+
140+
141+
@pytest.mark.clipboard
142+
def test_mock_clipboard(mock_clipboard):
143+
import pandas.io.clipboard
144+
145+
pandas.io.clipboard.clipboard_set("abc")
146+
assert "abc" in set(mock_clipboard.values())
147+
result = pandas.io.clipboard.clipboard_get()
148+
assert result == "abc"
149+
150+
151+
@pytest.mark.single
152+
@pytest.mark.clipboard
153+
@pytest.mark.usefixtures("mock_clipboard")
154+
class TestClipboard:
155+
def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None):
156+
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
157+
result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding)
158+
tm.assert_frame_equal(data, result)
159+
160+
# Test that default arguments copy as tab delimited
161+
def test_round_trip_frame(self, df):
162+
self.check_round_trip_frame(df)
163+
164+
# Test that explicit delimiters are respected
165+
@pytest.mark.parametrize("sep", ["\t", ",", "|"])
166+
def test_round_trip_frame_sep(self, df, sep):
167+
self.check_round_trip_frame(df, sep=sep)
168+
169+
# Test white space separator
170+
def test_round_trip_frame_string(self, df):
171+
df.to_clipboard(excel=False, sep=None)
172+
result = read_clipboard()
173+
assert df.to_string() == result.to_string()
174+
assert df.shape == result.shape
175+
176+
# Two character separator is not supported in to_clipboard
177+
# Test that multi-character separators are not silently passed
178+
def test_excel_sep_warning(self, df):
179+
with tm.assert_produces_warning():
180+
df.to_clipboard(excel=True, sep=r"\t")
181+
182+
# Separator is ignored when excel=False and should produce a warning
183+
def test_copy_delim_warning(self, df):
184+
with tm.assert_produces_warning():
185+
df.to_clipboard(excel=False, sep="\t")
186+
187+
# Tests that the default behavior of to_clipboard is tab
188+
# delimited and excel="True"
189+
@pytest.mark.parametrize("sep", ["\t", None, "default"])
190+
@pytest.mark.parametrize("excel", [True, None, "default"])
191+
def test_clipboard_copy_tabs_default(self, sep, excel, df, request, mock_clipboard):
192+
kwargs = build_kwargs(sep, excel)
193+
df.to_clipboard(**kwargs)
194+
assert mock_clipboard[request.node.name] == df.to_csv(sep="\t")
195+
196+
# Tests reading of white space separated tables
197+
@pytest.mark.parametrize("sep", [None, "default"])
198+
@pytest.mark.parametrize("excel", [False])
199+
def test_clipboard_copy_strings(self, sep, excel, df):
200+
kwargs = build_kwargs(sep, excel)
201+
df.to_clipboard(**kwargs)
202+
result = read_clipboard(sep=r"\s+")
203+
assert result.to_string() == df.to_string()
204+
assert df.shape == result.shape
205+
206+
def test_read_clipboard_infer_excel(self, request, mock_clipboard):
207+
# gh-19010: avoid warnings
208+
clip_kwargs = {"engine": "python"}
209+
210+
text = dedent(
211+
"""
212+
John James Charlie Mingus
213+
1 2
214+
4 Harry Carney
215+
""".strip()
216+
)
217+
mock_clipboard[request.node.name] = text
218+
df = read_clipboard(**clip_kwargs)
219+
220+
# excel data is parsed correctly
221+
assert df.iloc[1][1] == "Harry Carney"
222+
223+
# having diff tab counts doesn't trigger it
224+
text = dedent(
225+
"""
226+
a\t b
227+
1 2
228+
3 4
229+
""".strip()
230+
)
231+
mock_clipboard[request.node.name] = text
232+
res = read_clipboard(**clip_kwargs)
233+
234+
text = dedent(
235+
"""
236+
a b
237+
1 2
238+
3 4
239+
""".strip()
240+
)
241+
mock_clipboard[request.node.name] = text
242+
exp = read_clipboard(**clip_kwargs)
243+
244+
tm.assert_frame_equal(res, exp)
245+
246+
def test_infer_excel_with_nulls(self, request, mock_clipboard):
247+
# GH41108
248+
text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen"
249+
250+
mock_clipboard[request.node.name] = text
251+
df = read_clipboard()
252+
df_expected = DataFrame(
253+
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}
254+
)
255+
256+
# excel data is parsed correctly
257+
tm.assert_frame_equal(df, df_expected)
258+
259+
@pytest.mark.parametrize(
260+
"multiindex",
261+
[
262+
( # Can't use `dedent` here as it will remove the leading `\t`
263+
"\n".join(
264+
[
265+
"\t\t\tcol1\tcol2",
266+
"A\t0\tTrue\t1\tred",
267+
"A\t1\tTrue\t\tblue",
268+
"B\t0\tFalse\t2\tgreen",
269+
]
270+
),
271+
[["A", "A", "B"], [0, 1, 0], [True, True, False]],
272+
),
273+
(
274+
"\n".join(
275+
["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"]
276+
),
277+
[["A", "A", "B"], [0, 1, 0]],
278+
),
279+
],
280+
)
281+
def test_infer_excel_with_multiindex(self, request, mock_clipboard, multiindex):
282+
# GH41108
283+
284+
mock_clipboard[request.node.name] = multiindex[0]
285+
df = read_clipboard()
286+
df_expected = DataFrame(
287+
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
288+
index=multiindex[1],
289+
)
290+
291+
# excel data is parsed correctly
292+
tm.assert_frame_equal(df, df_expected)
293+
294+
def test_invalid_encoding(self, df):
295+
msg = "clipboard only supports utf-8 encoding"
296+
# test case for testing invalid encoding
297+
with pytest.raises(ValueError, match=msg):
298+
df.to_clipboard(encoding="ascii")
299+
with pytest.raises(NotImplementedError, match=msg):
300+
read_clipboard(encoding="ascii")
301+
302+
@pytest.mark.parametrize("enc", ["UTF-8", "utf-8", "utf8"])
303+
def test_round_trip_valid_encodings(self, enc, df):
304+
self.check_round_trip_frame(df, encoding=enc)
305+
306+
307+
@pytest.mark.single
308+
@pytest.mark.clipboard
309+
@pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."])
310+
def test_raw_roundtrip(data):
311+
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
312+
clipboard_set(data)
313+
assert data == clipboard_get()

0 commit comments

Comments
 (0)