wip

MarcoGorelli · MarcoGorelli · commit 73a90f09c624 · 2022-10-10T12:31:10.000+01:00
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -1269,15 +1269,15 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
 @pytest.mark.parametrize("value", ["0"])
 def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
     # if we have an invalid date make sure that we handle this with
-    # and w/o the cache properly. TODO: parse dates directly in pyarrow, see
-    # https://github.com/pandas-dev/pandas/issues/48017
-
+    # and w/o the cache properly.
     parser = all_parsers
     s = StringIO((f"{value},\n") * 50000)
 
     if parser.engine == "pyarrow":
         # pyarrow reads "0" as 0 (of type int64), and so
         # pandas doesn't try to guess the datetime format
+        # TODO: parse dates directly in pyarrow, see
+        # https://github.com/pandas-dev/pandas/issues/48017
         warn = None
     else:
         warn = UserWarning
@@ -1729,7 +1729,7 @@ def test_parse_delimited_date_swap_with_warning(
     expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
     warning_msg = (
         "Parsing dates in .* format when dayfirst=.* was specified. "
-        "Pass `dayfirst=.*` or `format='.*'` to silence this warning."
+        "Pass `dayfirst=.*` or explicitly specify a format to silence this warning."
     )
     result = parser.read_csv_check_warnings(
         UserWarning,
@@ -1988,7 +1988,7 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst):
     )
     warning_msg = (
         "Parsing dates in .* format when dayfirst=.* was specified. "
-        "Pass `dayfirst=.*` or `format='.*'` to silence this warning."
+        "Pass `dayfirst=.*` or explicitly specify a format to silence this warning."
     )
     with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res = read_csv(
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -227,19 +227,14 @@ def test_to_datetime_with_NA(self, data, format, expected):
         result = to_datetime(data, format=format)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "data, format, expected",
-        [
-            (["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])),
-        ],
-    )
-    def test_to_datetime_with_NA_with_warning(self, data, format, expected):
+    def test_to_datetime_with_NA_with_warning(self):
         # GH#42957
         with tm.assert_produces_warning(
             UserWarning,
             match="Could not infer format",
         ):
-            result = to_datetime(data, format=format)
+            result = to_datetime(["201010", pd.NA])
+        expected = DatetimeIndex(["2010-10-20", "NaT"])
         tm.assert_index_equal(result, expected)
 
     def test_to_datetime_format_integer(self, cache):
@@ -356,7 +351,6 @@ def test_to_datetime_with_non_exact(self, cache):
         ],
     )
     def test_parse_nanoseconds_with_formula_no_warning(self, cache, arg):
-
         # GH8989
         # truncating the nanoseconds when a format was provided
         expected = to_datetime(arg, cache=cache)
@@ -372,7 +366,6 @@ def test_parse_nanoseconds_with_formula_no_warning(self, cache, arg):
         ],
     )
     def test_parse_nanoseconds_with_formula_with_warning(self, cache, arg):
-
         # GH8989
         # truncating the nanoseconds when a format was provided
         with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
@@ -972,15 +965,13 @@ def test_datetime_invalid_datatype(self, arg):
 
     @pytest.mark.parametrize("value", ["a", "00:01:99"])
     @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_invalid_scalar(self, value, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_invalid_scalar(self, value, format, warning, infer):
         # GH24763
-        if format is None:
-            warn = UserWarning
-        else:
-            warn = None
         with tm.assert_produces_warning(
-            warn,
+            warning,
             match="Could not infer format",
         ):
             res = to_datetime(
@@ -989,7 +980,7 @@ def test_datetime_invalid_scalar(self, value, format, infer):
         assert res == value
 
         with tm.assert_produces_warning(
-            warn,
+            warning,
             match="Could not infer format",
         ):
             res = to_datetime(
@@ -1003,22 +994,20 @@ def test_datetime_invalid_scalar(self, value, format, infer):
             f"Given date string {value} not likely a datetime"
         )
         with pytest.raises(ValueError, match=msg):
-            with tm.assert_produces_warning(warn, match="Could not infer format"):
+            with tm.assert_produces_warning(warning, match="Could not infer format"):
                 to_datetime(
                     value, errors="raise", format=format, infer_datetime_format=infer
                 )
 
     @pytest.mark.parametrize("value", ["3000/12/11 00:00:00"])
     @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_outofbounds_scalar(self, value, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_outofbounds_scalar(self, value, format, warning, infer):
         # GH24763
-        if format is None:
-            warn = UserWarning
-        else:
-            warn = None
         with tm.assert_produces_warning(
-            warn,
+            warning,
             match="Could not infer format",
         ):
             res = to_datetime(
@@ -1027,7 +1016,7 @@ def test_datetime_outofbounds_scalar(self, value, format, infer):
         assert res == value
 
         with tm.assert_produces_warning(
-            warn,
+            warning,
             match="Could not infer format",
         ):
             res = to_datetime(
@@ -1042,31 +1031,26 @@ def test_datetime_outofbounds_scalar(self, value, format, infer):
                     value, errors="raise", format=format, infer_datetime_format=infer
                 )
         else:
-            # TODO changes slightly, leave as GH comment
-            msg = r"^Out of bounds nanosecond timestamp: .*"
-            with pytest.raises(OutOfBoundsDatetime, match=msg):
-                with tm.assert_produces_warning(
-                    warn,
-                    match="Could not infer format",
-                ):
-                    to_datetime(
-                        value,
-                        errors="raise",
-                        format=format,
-                        infer_datetime_format=infer,
-                    )
+            msg = "Out of bounds .* present at position 0"
+            with pytest.raises(
+                OutOfBoundsDatetime, match=msg
+            ), tm.assert_produces_warning(
+                warning,
+                match="Could not infer format",
+            ):
+                to_datetime(
+                    value, errors="raise", format=format, infer_datetime_format=infer
+                )
 
     @pytest.mark.parametrize("values", [["a"], ["00:01:99"], ["a", "b", "99:00:00"]])
     @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_invalid_index(self, values, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_invalid_index(self, values, format, warning, infer):
         # GH24763
-        if format is None:
-            warn = UserWarning
-        else:
-            warn = None
         with tm.assert_produces_warning(
-            warn,
+            warning,
             match="Could not infer format",
         ):
             res = to_datetime(
@@ -1075,7 +1059,7 @@ def test_datetime_invalid_index(self, values, format, infer):
         tm.assert_index_equal(res, Index(values))
 
         with tm.assert_produces_warning(
-            warn,
+            warning,
             match="Could not infer format",
         ):
             res = to_datetime(
@@ -1090,7 +1074,7 @@ def test_datetime_invalid_index(self, values, format, infer):
         )
         with pytest.raises(ValueError, match=msg):
             with tm.assert_produces_warning(
-                warn,
+                warning,
                 match="Could not infer format",
             ):
                 to_datetime(
@@ -1199,7 +1183,9 @@ def test_to_datetime_converts_null_like_to_nat(self, cache, input, expected):
             (Series([""] * 60), Series([NaT] * 60, dtype="datetime64[ns]")),
         ),
     )
-    def test_to_datetime_converts_null_like_to_nat_warns(self, cache, input, expected):
+    def test_to_datetime_converts_null_like_to_nat_with_warning(
+        self, cache, input, expected
+    ):
         # GH35888
         with tm.assert_produces_warning(
             UserWarning,
@@ -1324,6 +1310,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self):
 
     def test_iso8601_strings_mixed_offsets_with_naive(self):
         # GH 24992
+        # Can't parse consistently, need to parse each element in loop.
         result = DatetimeIndex(
             [
                 to_datetime(string, utc=True)
@@ -1350,6 +1337,7 @@ def test_iso8601_strings_mixed_offsets_with_naive(self):
 
     def test_iso8601_strings_mixed_offsets_with_naive_reversed(self):
         items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"]
+        # Can't parse consistently, need to parse each element in loop.
         result = [to_datetime(item, utc=True) for item in items]
         expected = [to_datetime(item, utc=True) for item in list(reversed(items))][::-1]
         assert result == expected
@@ -1514,34 +1502,20 @@ def test_unit_with_numeric(self, cache, errors, dtype):
         result = to_datetime(arr, errors=errors, cache=cache)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "exp, arr",
-        [
-            [
-                ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"],
-                [1.434692e18, 1.432766e18, "foo", "NaT"],
-            ],
-        ],
-    )
-    def test_unit_with_numeric_coerce(self, cache, exp, arr):
+    def test_unit_with_numeric_coerce(self, cache):
         # but we want to make sure that we are coercing
         # if we have ints/strings
+        exp = ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"]
+        arr = [1.434692e18, 1.432766e18, "foo", "NaT"]
         expected = DatetimeIndex(exp)
         result = to_datetime(arr, errors="coerce", cache=cache)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "exp, arr",
-        [
-            [
-                ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"],
-                ["foo", 1.434692e18, 1.432766e18],
-            ],
-        ],
-    )
-    def test_unit_with_numeric_coerce_warns(self, cache, exp, arr):
+    def test_unit_with_numeric_coerce_with_warning(self, cache):
         # but we want to make sure that we are coercing
         # if we have ints/strings
+        exp = ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"]
+        arr = ["foo", 1.434692e18, 1.432766e18]
         expected = DatetimeIndex(exp)
         with tm.assert_produces_warning(
             UserWarning,
@@ -2300,13 +2274,17 @@ def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache):
         )
 
     @pytest.mark.parametrize(
-        "tz_name, offset", [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)]
+        "tz_name, offset, warning",
+        [("UTC", 0, None), ("UTC-3", 180, UserWarning), ("UTC+3", -180, UserWarning)],
     )
-    @pytest.mark.xfail(reason="todo", strict=False)
-    def test_infer_datetime_format_tz_name(self, tz_name, offset):
+    def test_infer_datetime_format_tz_name(self, tz_name, offset, warning):
         # GH 33133
         ser = Series([f"2019-02-02 08:07:13 {tz_name}"])
-        result = to_datetime(ser, infer_datetime_format=True)
+        with tm.assert_produces_warning(
+            warning,
+            match="Could not infer format",
+        ):
+            result = to_datetime(ser, infer_datetime_format=True)
         expected = Series(
             [Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))]
         )
@@ -2347,25 +2325,17 @@ class TestDaysInMonth:
     # tests for issue #10154
 
     @pytest.mark.parametrize(
-        "arg, format",
-        [
-            ["2015-02-29", "%Y-%m-%d"],
-            ["2015-02-32", "%Y-%m-%d"],
-            ["2015-04-31", "%Y-%m-%d"],
-        ],
-    )
-    def test_day_not_in_month_coerce(self, cache, arg, format):
-        assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
-
-    @pytest.mark.parametrize(
-        "arg, format",
+        "arg, format, warning",
         [
-            ["2015-02-29", None],
+            ["2015-02-29", None, UserWarning],
+            ["2015-02-29", "%Y-%m-%d", None],
+            ["2015-02-32", "%Y-%m-%d", None],
+            ["2015-04-31", "%Y-%m-%d", None],
         ],
     )
-    def test_day_not_in_month_coerce_with_warning(self, cache, arg, format):
+    def test_day_not_in_month_coerce(self, cache, arg, format, warning):
         with tm.assert_produces_warning(
-            UserWarning,
+            warning,
             match="Could not infer format",
         ):
             assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))