From de329557382a97e3ae2a68d6df727bc5e91ac332 Mon Sep 17 00:00:00 2001 From: Liang Yan Date: Mon, 27 Mar 2023 10:47:36 +0800 Subject: [PATCH] read_fwf with urlopen test GH#26376 Signed-off-by: Liang Yan --- pandas/_testing/_io.py | 5 ++- pandas/tests/io/parser/test_read_fwf.py | 48 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index 37a75d9f59920..d79968a580e40 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -271,7 +271,10 @@ def can_connect(url, error_classes=None) -> bool: try: with urlopen(url, timeout=20) as response: # Timeout just in case rate-limiting is applied - if response.status != 200: + if ( + response.info().get("Content-type") == "text/html" + and response.status != 200 + ): return False except error_classes: return False diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index d166946704e13..2a05a3aa3297e 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -28,6 +28,7 @@ ) from pandas.tests.io.test_compression import _compression_to_extension +from pandas.io.common import urlopen from pandas.io.parsers import ( read_csv, read_fwf, @@ -1010,3 +1011,50 @@ def test_invalid_dtype_backend(): ) with pytest.raises(ValueError, match=msg): read_fwf("test", dtype_backend="numpy") + + +@pytest.mark.network +@tm.network( + url="ftp://ftp.ncdc.noaa.gov/pub/data/igra/igra2-station-list.txt", + check_before_test=True, +) +def test_url_urlopen(): + expected = pd.Index( + [ + "CC", + "Network", + "Code", + "StationId", + "Latitude", + "Longitude", + "Elev", + "dummy", + "StationName", + "From", + "To", + "Nrec", + ], + dtype="object", + ) + url = "ftp://ftp.ncdc.noaa.gov/pub/data/igra/igra2-station-list.txt" + with urlopen(url) as f: + result = read_fwf( + f, + widths=(2, 1, 3, 5, 9, 10, 7, 4, 30, 5, 5, 7), + names=( + "CC", + "Network", + "Code", + "StationId", + "Latitude", + "Longitude", + "Elev", + "dummy", + "StationName", + "From", + "To", + "Nrec", + ), + ).columns + + tm.assert_index_equal(result, expected)