Skip to content

Commit ad841bf

Browse files
Merge remote-tracking branch 'upstream/main' into test_numpy_complex2
2 parents d25baa2 + 9aa3f95 commit ad841bf

File tree

12 files changed

+162
-82
lines changed

12 files changed

+162
-82
lines changed

doc/source/whatsnew/v2.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ MultiIndex
227227

228228
I/O
229229
^^^
230+
- Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
230231
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
231232

232233
Period

pandas/_libs/parsers.pyx

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ from csv import (
66
QUOTE_NONE,
77
QUOTE_NONNUMERIC,
88
)
9-
import sys
109
import time
1110
import warnings
1211

@@ -880,9 +879,15 @@ cdef class TextReader:
880879

881880
cdef _check_tokenize_status(self, int status):
882881
if self.parser.warn_msg != NULL:
883-
print(PyUnicode_DecodeUTF8(
884-
self.parser.warn_msg, strlen(self.parser.warn_msg),
885-
self.encoding_errors), file=sys.stderr)
882+
warnings.warn(
883+
PyUnicode_DecodeUTF8(
884+
self.parser.warn_msg,
885+
strlen(self.parser.warn_msg),
886+
self.encoding_errors
887+
),
888+
ParserWarning,
889+
stacklevel=find_stack_level()
890+
)
886891
free(self.parser.warn_msg)
887892
self.parser.warn_msg = NULL
888893

pandas/compat/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
pa_version_under9p0,
3131
pa_version_under11p0,
3232
pa_version_under13p0,
33+
pa_version_under14p0,
3334
)
3435

3536
if TYPE_CHECKING:
@@ -186,6 +187,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
186187
"pa_version_under9p0",
187188
"pa_version_under11p0",
188189
"pa_version_under13p0",
190+
"pa_version_under14p0",
189191
"IS64",
190192
"ISMUSL",
191193
"PY310",

pandas/compat/pyarrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
pa_version_under11p0 = _palv < Version("11.0.0")
1616
pa_version_under12p0 = _palv < Version("12.0.0")
1717
pa_version_under13p0 = _palv < Version("13.0.0")
18+
pa_version_under14p0 = _palv < Version("14.0.0")
1819
except ImportError:
1920
pa_version_under7p0 = True
2021
pa_version_under8p0 = True
@@ -23,3 +24,4 @@
2324
pa_version_under11p0 = True
2425
pa_version_under12p0 = True
2526
pa_version_under13p0 = True
27+
pa_version_under14p0 = True

pandas/core/internals/managers.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,10 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
969969
n = len(self)
970970

971971
if isinstance(dtype, ExtensionDtype):
972+
# TODO: use object dtype as workaround for non-performant
973+
# EA.__setitem__ methods. (primarily ArrowExtensionArray.__setitem__
974+
# when iteratively setting individual values)
975+
# https://github.com/pandas-dev/pandas/pull/54508#issuecomment-1675827918
972976
result = np.empty(n, dtype=object)
973977
else:
974978
result = np.empty(n, dtype=dtype)

pandas/io/clipboard/__init__.py

Lines changed: 90 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@
1717
On Windows, no additional modules are needed.
1818
On Mac, the pyobjc module is used, falling back to the pbcopy and pbpaste cli
1919
commands. (These commands should come with OS X.).
20-
On Linux, install xclip or xsel via package manager. For example, in Debian:
20+
On Linux, install xclip, xsel, or wl-clipboard (for "wayland" sessions) via
21+
package manager.
22+
For example, in Debian:
2123
sudo apt-get install xclip
2224
sudo apt-get install xsel
25+
sudo apt-get install wl-clipboard
2326
2427
Otherwise on Linux, you will need the PyQt5 modules installed.
2528
@@ -28,20 +31,19 @@
2831
Cygwin is currently not supported.
2932
3033
Security Note: This module runs programs with these names:
31-
- which
32-
- where
3334
- pbcopy
3435
- pbpaste
3536
- xclip
3637
- xsel
38+
- wl-copy/wl-paste
3739
- klipper
3840
- qdbus
3941
A malicious user could rename or add programs with these names, tricking
4042
Pyperclip into running them with whatever permissions the Python process has.
4143
4244
"""
4345

44-
__version__ = "1.7.0"
46+
__version__ = "1.8.2"
4547

4648

4749
import contextlib
@@ -55,7 +57,7 @@
5557
)
5658
import os
5759
import platform
58-
from shutil import which
60+
from shutil import which as _executable_exists
5961
import subprocess
6062
import time
6163
import warnings
@@ -74,25 +76,14 @@
7476
EXCEPT_MSG = """
7577
Pyperclip could not find a copy/paste mechanism for your system.
7678
For more information, please visit
77-
https://pyperclip.readthedocs.io/en/latest/#not-implemented-error
79+
https://pyperclip.readthedocs.io/en/latest/index.html#not-implemented-error
7880
"""
7981

8082
ENCODING = "utf-8"
8183

82-
# The "which" unix command finds where a command is.
83-
if platform.system() == "Windows":
84-
WHICH_CMD = "where"
85-
else:
86-
WHICH_CMD = "which"
8784

88-
89-
def _executable_exists(name):
90-
return (
91-
subprocess.call(
92-
[WHICH_CMD, name], stdout=subprocess.PIPE, stderr=subprocess.PIPE
93-
)
94-
== 0
95-
)
85+
class PyperclipTimeoutException(PyperclipException):
86+
pass
9687

9788

9889
def _stringifyText(text) -> str:
@@ -229,6 +220,32 @@ def paste_xsel(primary=False):
229220
return copy_xsel, paste_xsel
230221

231222

223+
def init_wl_clipboard():
224+
PRIMARY_SELECTION = "-p"
225+
226+
def copy_wl(text, primary=False):
227+
text = _stringifyText(text) # Converts non-str values to str.
228+
args = ["wl-copy"]
229+
if primary:
230+
args.append(PRIMARY_SELECTION)
231+
if not text:
232+
args.append("--clear")
233+
subprocess.check_call(args, close_fds=True)
234+
else:
235+
p = subprocess.Popen(args, stdin=subprocess.PIPE, close_fds=True)
236+
p.communicate(input=text.encode(ENCODING))
237+
238+
def paste_wl(primary=False):
239+
args = ["wl-paste", "-n"]
240+
if primary:
241+
args.append(PRIMARY_SELECTION)
242+
p = subprocess.Popen(args, stdout=subprocess.PIPE, close_fds=True)
243+
stdout, _stderr = p.communicate()
244+
return stdout.decode(ENCODING)
245+
246+
return copy_wl, paste_wl
247+
248+
232249
def init_klipper_clipboard():
233250
def copy_klipper(text):
234251
text = _stringifyText(text) # Converts non-str values to str.
@@ -534,7 +551,7 @@ def determine_clipboard():
534551
return init_windows_clipboard()
535552

536553
if platform.system() == "Linux":
537-
if which("wslconfig.exe"):
554+
if _executable_exists("wslconfig.exe"):
538555
return init_wsl_clipboard()
539556

540557
# Setup for the macOS platform:
@@ -549,6 +566,8 @@ def determine_clipboard():
549566

550567
# Setup for the LINUX platform:
551568
if HAS_DISPLAY:
569+
if os.environ.get("WAYLAND_DISPLAY") and _executable_exists("wl-copy"):
570+
return init_wl_clipboard()
552571
if _executable_exists("xsel"):
553572
return init_xsel_clipboard()
554573
if _executable_exists("xclip"):
@@ -602,6 +621,7 @@ def set_clipboard(clipboard):
602621
"qt": init_qt_clipboard, # TODO - split this into 'qtpy', 'pyqt4', and 'pyqt5'
603622
"xclip": init_xclip_clipboard,
604623
"xsel": init_xsel_clipboard,
624+
"wl-clipboard": init_wl_clipboard,
605625
"klipper": init_klipper_clipboard,
606626
"windows": init_windows_clipboard,
607627
"no": init_no_clipboard,
@@ -671,7 +691,56 @@ def is_available() -> bool:
671691
copy, paste = lazy_load_stub_copy, lazy_load_stub_paste
672692

673693

674-
__all__ = ["copy", "paste", "set_clipboard", "determine_clipboard"]
694+
def waitForPaste(timeout=None):
695+
"""This function call blocks until a non-empty text string exists on the
696+
clipboard. It returns this text.
697+
698+
This function raises PyperclipTimeoutException if timeout was set to
699+
a number of seconds that has elapsed without non-empty text being put on
700+
the clipboard."""
701+
startTime = time.time()
702+
while True:
703+
clipboardText = paste()
704+
if clipboardText != "":
705+
return clipboardText
706+
time.sleep(0.01)
707+
708+
if timeout is not None and time.time() > startTime + timeout:
709+
raise PyperclipTimeoutException(
710+
"waitForPaste() timed out after " + str(timeout) + " seconds."
711+
)
712+
713+
714+
def waitForNewPaste(timeout=None):
715+
"""This function call blocks until a new text string exists on the
716+
clipboard that is different from the text that was there when the function
717+
was first called. It returns this text.
718+
719+
This function raises PyperclipTimeoutException if timeout was set to
720+
a number of seconds that has elapsed without non-empty text being put on
721+
the clipboard."""
722+
startTime = time.time()
723+
originalText = paste()
724+
while True:
725+
currentText = paste()
726+
if currentText != originalText:
727+
return currentText
728+
time.sleep(0.01)
729+
730+
if timeout is not None and time.time() > startTime + timeout:
731+
raise PyperclipTimeoutException(
732+
"waitForNewPaste() timed out after " + str(timeout) + " seconds."
733+
)
734+
735+
736+
__all__ = [
737+
"copy",
738+
"paste",
739+
"waitForPaste",
740+
"waitForNewPaste",
741+
"set_clipboard",
742+
"determine_clipboard",
743+
]
675744

676745
# pandas aliases
677746
clipboard_get = paste

pandas/io/parsers/python_parser.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,25 @@
1313
import csv
1414
from io import StringIO
1515
import re
16-
import sys
1716
from typing import (
1817
IO,
1918
TYPE_CHECKING,
2019
DefaultDict,
2120
Literal,
2221
cast,
2322
)
23+
import warnings
2424

2525
import numpy as np
2626

2727
from pandas._libs import lib
2828
from pandas.errors import (
2929
EmptyDataError,
3030
ParserError,
31+
ParserWarning,
3132
)
3233
from pandas.util._decorators import cache_readonly
34+
from pandas.util._exceptions import find_stack_level
3335

3436
from pandas.core.dtypes.common import (
3537
is_bool_dtype,
@@ -778,8 +780,11 @@ def _alert_malformed(self, msg: str, row_num: int) -> None:
778780
if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
779781
raise ParserError(msg)
780782
if self.on_bad_lines == self.BadLineHandleMethod.WARN:
781-
base = f"Skipping line {row_num}: "
782-
sys.stderr.write(base + msg + "\n")
783+
warnings.warn(
784+
f"Skipping line {row_num}: {msg}\n",
785+
ParserWarning,
786+
stacklevel=find_stack_level(),
787+
)
783788

784789
def _next_iter_line(self, row_num: int) -> list[Scalar] | None:
785790
"""

pandas/tests/extension/test_arrow.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
pa_version_under9p0,
4141
pa_version_under11p0,
4242
pa_version_under13p0,
43+
pa_version_under14p0,
4344
)
4445

4546
from pandas.core.dtypes.dtypes import (
@@ -927,7 +928,7 @@ def _is_temporal_supported(self, opname, pa_dtype):
927928
or (
928929
opname
929930
in ("__truediv__", "__rtruediv__", "__floordiv__", "__rfloordiv__")
930-
and not pa_version_under13p0
931+
and not pa_version_under14p0
931932
)
932933
)
933934
and pa.types.is_duration(pa_dtype)

0 commit comments

Comments
 (0)