From 8eb547ec0bd24b5b9773864e3dee76c1e47db286 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 10:12:40 -0700 Subject: [PATCH 1/6] CI: avoid file leaks in sas_xport tests --- pandas/tests/io/sas/test_xport.py | 7 +++++++ pandas/util/_test_decorators.py | 21 ++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 2682bafedb8f1..4dba16e88c437 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm @@ -30,6 +32,11 @@ def setup_method(self, datapath): self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") + with td.file_leak_context(): + yield + + self.file02b.close() + def test1_basic(self): # Tests with DEMO_G.xpt (all numeric file) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index bdf633839b2cd..e617fe426cfdf 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -23,6 +23,7 @@ def test_foo(): For more information, refer to the ``pytest`` documentation on ``skipif``. """ +from contextlib import contextmanager from distutils.version import LooseVersion from functools import wraps import locale @@ -237,7 +238,7 @@ def documented_fixture(fixture): def check_file_leaks(func) -> Callable: """ - Decorate a test function tot check that we are not leaking file descriptors. + Decorate a test function to check that we are not leaking file descriptors. """ psutil = safe_import("psutil") if not psutil: @@ -256,6 +257,24 @@ def new_func(*args, **kwargs): return new_func +@contextmanager +def file_leak_context(): + """ + ContextManager analogue to check_file_leaks. + """ + psutil = safe_import("psutil") + if not psutil: + yield + else: + proc = psutil.Process() + flist = proc.open_files() + + yield + + flist2 = proc.open_files() + assert flist2 == flist, (flist2, flist) + + def async_mark(): try: import_optional_dependency("pytest_asyncio") From bd953bf9dc3e5b9c44e5e96bab3582dff31d2d86 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 11:01:57 -0700 Subject: [PATCH 2/6] CLN: reuse context inside decorator --- pandas/util/_test_decorators.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index e617fe426cfdf..08c84528fcc87 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -25,7 +25,6 @@ def test_foo(): """ from contextlib import contextmanager from distutils.version import LooseVersion -from functools import wraps import locale from typing import Callable, Optional @@ -240,22 +239,9 @@ def check_file_leaks(func) -> Callable: """ Decorate a test function to check that we are not leaking file descriptors. """ - psutil = safe_import("psutil") - if not psutil: + with file_leak_context(): return func - @wraps(func) - def new_func(*args, **kwargs): - proc = psutil.Process() - flist = proc.open_files() - - func(*args, **kwargs) - - flist2 = proc.open_files() - assert flist2 == flist - - return new_func - @contextmanager def file_leak_context(): From c11444971a5ca2b4f1f237f568c663a47247068b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 14:50:21 -0700 Subject: [PATCH 3/6] TST: check for leaked socket connections --- pandas/util/_test_decorators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 08c84528fcc87..c56d0ad3e9472 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -254,12 +254,16 @@ def file_leak_context(): else: proc = psutil.Process() flist = proc.open_files() + conns = proc.connections() yield flist2 = proc.open_files() assert flist2 == flist, (flist2, flist) + conns2 = proc.connections() + assert conns2 == conns, (conns2, conns) + def async_mark(): try: From c2a362c89fb489e4af90423e8661d876044a4a00 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 16:13:52 -0700 Subject: [PATCH 4/6] open file later --- pandas/tests/io/sas/test_xport.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index 4dba16e88c437..ae2022e5cabbe 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -28,15 +28,12 @@ def setup_method(self, datapath): self.dirpath = datapath("io", "sas", "data") self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") - self.file02b = open(os.path.join(self.dirpath, "SSHSV1_A.xpt"), "rb") self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") with td.file_leak_context(): yield - self.file02b.close() - def test1_basic(self): # Tests with DEMO_G.xpt (all numeric file) @@ -134,7 +131,8 @@ def test2_binary(self): data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv")) numeric_as_float(data_csv) - data = read_sas(self.file02b, format="xport") + with open(self.file02, "rb") as fd: + data = read_sas(fd, format="xport") tm.assert_frame_equal(data, data_csv) def test_multiple_types(self): From e75a15bfe9b3b0b458c127f3d80f4787559abc0f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Aug 2020 10:31:08 -0700 Subject: [PATCH 5/6] Fix incorrect file closing in read_sas --- pandas/io/sas/sasreader.py | 10 ++++++++-- pandas/tests/io/sas/test_xport.py | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 291c9d1ee7f0c..fffdebda8c87a 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -6,7 +6,7 @@ from pandas._typing import FilePathOrBuffer, Label -from pandas.io.common import stringify_path +from pandas.io.common import get_filepath_or_buffer, stringify_path if TYPE_CHECKING: from pandas import DataFrame # noqa: F401 @@ -109,6 +109,10 @@ def read_sas( else: raise ValueError("unable to infer format of SAS file") + filepath_or_buffer, _, _, should_close = get_filepath_or_buffer( + filepath_or_buffer, encoding + ) + reader: ReaderBase if format.lower() == "xport": from pandas.io.sas.sas_xport import XportReader @@ -129,5 +133,7 @@ def read_sas( return reader data = reader.read() - reader.close() + + if should_close: + reader.close() return data diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index ae2022e5cabbe..939edb3d8e0b4 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -132,7 +132,11 @@ def test2_binary(self): numeric_as_float(data_csv) with open(self.file02, "rb") as fd: - data = read_sas(fd, format="xport") + with td.file_leak_context(): + # GH#35693 ensure that if we pass an open file, we + # dont incorrectly close it in read_sas + data = read_sas(fd, format="xport") + tm.assert_frame_equal(data, data_csv) def test_multiple_types(self): From 92efdf2c3d38f64063a4e01a60c2d4497234c840 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Aug 2020 11:07:42 -0700 Subject: [PATCH 6/6] min_version compat --- pandas/util/_test_decorators.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index c56d0ad3e9472..0dad8c7397e37 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -259,7 +259,11 @@ def file_leak_context(): yield flist2 = proc.open_files() - assert flist2 == flist, (flist2, flist) + # on some builds open_files includes file position, which we _dont_ + # expect to remain unchanged, so we need to compare excluding that + flist_ex = [(x.path, x.fd) for x in flist] + flist2_ex = [(x.path, x.fd) for x in flist2] + assert flist2_ex == flist_ex, (flist2, flist) conns2 = proc.connections() assert conns2 == conns, (conns2, conns)