From 7226c9d48586b2d91c71249b727d5889e820d75b Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Tue, 27 Sep 2022 21:46:16 -0400 Subject: [PATCH 1/6] Turn isna() and notna() into TypeGuards --- pandas-stubs/core/dtypes/missing.pyi | 17 +++++------- tests/test_pandas.py | 39 +++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/pandas-stubs/core/dtypes/missing.pyi b/pandas-stubs/core/dtypes/missing.pyi index 747f260bd..84bc8ff35 100644 --- a/pandas-stubs/core/dtypes/missing.pyi +++ b/pandas-stubs/core/dtypes/missing.pyi @@ -1,7 +1,4 @@ -from typing import ( - Literal, - overload, -) +from typing import overload import numpy as np from numpy import typing as npt @@ -10,12 +7,14 @@ from pandas import ( Index, Series, ) +from typing_extensions import TypeGuard from pandas._libs.missing import NAType from pandas._libs.tslibs import NaTType from pandas._typing import ( ArrayLike, Scalar, + ScalarT, ) isposinf_scalar = ... @@ -28,9 +27,9 @@ def isna(obj: Series) -> Series[bool]: ... @overload def isna(obj: Index | list | ArrayLike) -> npt.NDArray[np.bool_]: ... @overload -def isna(obj: Scalar) -> bool: ... -@overload -def isna(obj: NaTType | NAType | None) -> Literal[True]: ... +def isna( + obj: Scalar | NaTType | NAType | None, +) -> TypeGuard[NaTType | NAType | None]: ... isnull = isna @@ -41,8 +40,6 @@ def notna(obj: Series) -> Series[bool]: ... @overload def notna(obj: Index | list | ArrayLike) -> npt.NDArray[np.bool_]: ... @overload -def notna(obj: Scalar) -> bool: ... -@overload -def notna(obj: NaTType | NAType | None) -> Literal[False]: ... +def notna(obj: ScalarT | NaTType | NAType | None) -> TypeGuard[ScalarT]: ... notnull = notna diff --git a/tests/test_pandas.py b/tests/test_pandas.py index d5f73991b..d4eb5d256 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -5,7 +5,6 @@ from typing import ( TYPE_CHECKING, Any, - Literal, Union, ) @@ -17,6 +16,8 @@ import pytest from typing_extensions import assert_type +from pandas._libs.missing import NAType +from pandas._libs.tslibs import NaTType from pandas._typing import Scalar from tests import ( @@ -246,18 +247,42 @@ def test_isna() -> None: idx2 = pd.Index([1, 2]) check(assert_type(pd.notna(idx2), npt.NDArray[np.bool_]), np.ndarray, np.bool_) - assert check(assert_type(pd.isna(pd.NA), Literal[True]), bool) - assert not check(assert_type(pd.notna(pd.NA), Literal[False]), bool) + assert check(assert_type(pd.isna(pd.NA), bool), bool) + assert not check(assert_type(pd.notna(pd.NA), bool), bool) - assert check(assert_type(pd.isna(pd.NaT), Literal[True]), bool) - assert not check(assert_type(pd.notna(pd.NaT), Literal[False]), bool) + assert check(assert_type(pd.isna(pd.NaT), bool), bool) + assert not check(assert_type(pd.notna(pd.NaT), bool), bool) - assert check(assert_type(pd.isna(None), Literal[True]), bool) - assert not check(assert_type(pd.notna(None), Literal[False]), bool) + assert check(assert_type(pd.isna(None), bool), bool) + assert not check(assert_type(pd.notna(None), bool), bool) check(assert_type(pd.isna(2.5), bool), bool) check(assert_type(pd.notna(2.5), bool), bool) + # Check type guard functionality + nullable1 = random.choice(["value", None, pd.NA, pd.NaT]) + if pd.notna(nullable1): + check(assert_type(nullable1, str), str) + if pd.isna(nullable1): + assert_type(nullable1, Union[NaTType, NAType, None]) + + nullable2 = random.choice([2, None]) + if pd.notna(nullable2): + check(assert_type(nullable2, int), int) + if pd.isna(nullable2): + # TODO: Due to limitations in TypeGuard spec, the true annotation is not viable at this time + # There is a proposal being floated for a StrictTypeGuard that will have more rigid narrowing semantics + # assert_type(nullable2, None) + assert_type(nullable2, Union[NaTType, NAType, None]) + + nullable3 = random.choice([2, None, pd.NA]) + if pd.notna(nullable3): + check(assert_type(nullable3, int), int) + if pd.isna(nullable3): + # TODO: See comment above about the limitations of TypeGuard + # assert_type(nullable3, Union[NAType, None]) + assert_type(nullable3, Union[NaTType, NAType, None]) + # GH 55 def test_read_xml() -> None: From 4a329f36777f4aa1ad497c983e50ab07decd7519 Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Wed, 5 Oct 2022 22:21:58 -0400 Subject: [PATCH 2/6] Assert the true/false return of isna/notna on a scalar value --- tests/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index d4eb5d256..d3944644e 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -256,8 +256,8 @@ def test_isna() -> None: assert check(assert_type(pd.isna(None), bool), bool) assert not check(assert_type(pd.notna(None), bool), bool) - check(assert_type(pd.isna(2.5), bool), bool) - check(assert_type(pd.notna(2.5), bool), bool) + assert not check(assert_type(pd.isna(2.5), bool), bool) + assert check(assert_type(pd.notna(2.5), bool), bool) # Check type guard functionality nullable1 = random.choice(["value", None, pd.NA, pd.NaT]) From 534f51855853fc6caf76213dd9e59b88af7a2f2d Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Wed, 5 Oct 2022 23:01:37 -0400 Subject: [PATCH 3/6] Adjust TypeGuard tests to document limitations of notna/isna type narrowing --- tests/test_pandas.py | 45 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index d3944644e..09f4ef580 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -259,29 +259,56 @@ def test_isna() -> None: assert not check(assert_type(pd.isna(2.5), bool), bool) assert check(assert_type(pd.notna(2.5), bool), bool) - # Check type guard functionality + # Check TypeGuard type narrowing functionality + # TODO: Due to limitations in TypeGuard spec, the true annotations are not always viable + # and as a result the type narrowing does not always work as it intuitively should + # There is a proposal being floated for a StrictTypeGuard that will have more rigid narrowing semantics + # In the test cases below, a commented out assertion will be included to document the optimal test result nullable1 = random.choice(["value", None, pd.NA, pd.NaT]) if pd.notna(nullable1): check(assert_type(nullable1, str), str) + if not pd.isna(nullable1): + # TODO: This is the true type (see comments above on the limitations of TypeGuard) + # check(assert_type(nullable1, str), str) + check(assert_type(nullable1, Union[str, NaTType, NAType, None]), str) if pd.isna(nullable1): assert_type(nullable1, Union[NaTType, NAType, None]) + if not pd.notna(nullable1): + # TODO: This is the true type (see comments above on the limitations of TypeGuard) + # assert_type(nullable1, Union[NaTType, NAType, None]) + assert_type(nullable1, Union[str, NaTType, NAType, None]) nullable2 = random.choice([2, None]) if pd.notna(nullable2): check(assert_type(nullable2, int), int) + if not pd.isna(nullable2): + # TODO: This is the true type (see comments above on the limitations of TypeGuard) + # check(assert_type(nullable2, int), int) + check(assert_type(nullable2, Union[int, None]), int) if pd.isna(nullable2): - # TODO: Due to limitations in TypeGuard spec, the true annotation is not viable at this time - # There is a proposal being floated for a StrictTypeGuard that will have more rigid narrowing semantics - # assert_type(nullable2, None) - assert_type(nullable2, Union[NaTType, NAType, None]) - - nullable3 = random.choice([2, None, pd.NA]) + # TODO: This is the true type (see comments above on the limitations of TypeGuard) + # check(assert_type(nullable2, None), type(None)) + check(assert_type(nullable2, Union[NaTType, NAType, None]), type(None)) + if not pd.notna(nullable2): + # TODO: This is the true type (see comments above on the limitations of TypeGuard) + # check(assert_type(nullable2, None), type(None)) + assert_type(nullable2, Union[int, NaTType, NAType, None]) + + nullable3 = random.choice([True, None, pd.NA]) if pd.notna(nullable3): - check(assert_type(nullable3, int), int) + check(assert_type(nullable3, bool), bool) + if not pd.isna(nullable3): + # TODO: This is the true type (see comments above on the limitations of TypeGuard) + # check(assert_type(nullable3, bool), bool) + check(assert_type(nullable3, Union[bool, NAType, None]), bool) if pd.isna(nullable3): - # TODO: See comment above about the limitations of TypeGuard + # TODO: This is the true type (see comments above on the limitations of TypeGuard) # assert_type(nullable3, Union[NAType, None]) assert_type(nullable3, Union[NaTType, NAType, None]) + if not pd.notna(nullable3): + # TODO: This is the true type (see comments above on the limitations of TypeGuard) + # assert_type(nullable3, Union[NAType, None]) + assert_type(nullable3, Union[bool, NaTType, NAType, None]) # GH 55 From 7ffcf824ee3f310127419adc59378ab39cceaf91 Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Wed, 5 Oct 2022 23:09:56 -0400 Subject: [PATCH 4/6] Document conflicting MyPy and pyright results --- tests/test_pandas.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 09f4ef580..5ad258b8a 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -292,7 +292,10 @@ def test_isna() -> None: if not pd.notna(nullable2): # TODO: This is the true type (see comments above on the limitations of TypeGuard) # check(assert_type(nullable2, None), type(None)) - assert_type(nullable2, Union[int, NaTType, NAType, None]) + assert_type(nullable2, Union[int, None]) # TODO: MyPy result + assert_type( + nullable2, Union[int, NaTType, NAType, None] + ) # TODO: Pyright result nullable3 = random.choice([True, None, pd.NA]) if pd.notna(nullable3): @@ -308,7 +311,10 @@ def test_isna() -> None: if not pd.notna(nullable3): # TODO: This is the true type (see comments above on the limitations of TypeGuard) # assert_type(nullable3, Union[NAType, None]) - assert_type(nullable3, Union[bool, NaTType, NAType, None]) + assert_type(nullable3, Union[bool, NAType, None]) # TODO: Mypy result + assert_type( + nullable3, Union[bool, NaTType, NAType, None] + ) # TODO: Pyright result # GH 55 From 6a2e12352c0a0b942bbc2285bd422e55a0ea7702 Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Tue, 11 Oct 2022 21:13:08 -0400 Subject: [PATCH 5/6] Comment out the tests which produce conflicting results in Mypy and Pyright Keep the tests as comments in order to document the behavior for future reference --- tests/test_pandas.py | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 5ad258b8a..ba10e8494 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -268,53 +268,49 @@ def test_isna() -> None: if pd.notna(nullable1): check(assert_type(nullable1, str), str) if not pd.isna(nullable1): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # check(assert_type(nullable1, str), str) + # check(assert_type(nullable1, str), str) # TODO: Desired result (see comments above) check(assert_type(nullable1, Union[str, NaTType, NAType, None]), str) if pd.isna(nullable1): assert_type(nullable1, Union[NaTType, NAType, None]) if not pd.notna(nullable1): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # assert_type(nullable1, Union[NaTType, NAType, None]) + # assert_type(nullable1, Union[NaTType, NAType, None]) # TODO: Desired result (see comments above) assert_type(nullable1, Union[str, NaTType, NAType, None]) nullable2 = random.choice([2, None]) if pd.notna(nullable2): check(assert_type(nullable2, int), int) if not pd.isna(nullable2): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # check(assert_type(nullable2, int), int) + # check(assert_type(nullable2, int), int) # TODO: Desired result (see comments above) check(assert_type(nullable2, Union[int, None]), int) if pd.isna(nullable2): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # check(assert_type(nullable2, None), type(None)) + # check(assert_type(nullable2, None), type(None)) # TODO: Desired result (see comments above) check(assert_type(nullable2, Union[NaTType, NAType, None]), type(None)) if not pd.notna(nullable2): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # check(assert_type(nullable2, None), type(None)) - assert_type(nullable2, Union[int, None]) # TODO: MyPy result - assert_type( - nullable2, Union[int, NaTType, NAType, None] - ) # TODO: Pyright result + # check(assert_type(nullable2, None), type(None)) # TODO: Desired result (see comments above) + # TODO: MyPy and Pyright produce conflicting results: + # assert_type(nullable2, Union[int, None]) # MyPy result + # assert_type( + # nullable2, Union[int, NaTType, NAType, None] + # ) # Pyright result + pass nullable3 = random.choice([True, None, pd.NA]) if pd.notna(nullable3): check(assert_type(nullable3, bool), bool) if not pd.isna(nullable3): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # check(assert_type(nullable3, bool), bool) + # check(assert_type(nullable3, bool), bool) # TODO: Desired result (see comments above) check(assert_type(nullable3, Union[bool, NAType, None]), bool) if pd.isna(nullable3): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # assert_type(nullable3, Union[NAType, None]) + # assert_type(nullable3, Union[NAType, None]) # TODO: Desired result (see comments above) assert_type(nullable3, Union[NaTType, NAType, None]) if not pd.notna(nullable3): - # TODO: This is the true type (see comments above on the limitations of TypeGuard) - # assert_type(nullable3, Union[NAType, None]) - assert_type(nullable3, Union[bool, NAType, None]) # TODO: Mypy result - assert_type( - nullable3, Union[bool, NaTType, NAType, None] - ) # TODO: Pyright result + # assert_type(nullable3, Union[NAType, None]) # TODO: Desired result (see comments above) + # TODO: MyPy and Pyright produce conflicting results: + # assert_type(nullable3, Union[bool, NAType, None]) # Mypy result + # assert_type( + # nullable3, Union[bool, NaTType, NAType, None] + # ) # Pyright result + pass # GH 55 From 39e431149d9b676a8276578e2f2938e16ab86d99 Mon Sep 17 00:00:00 2001 From: Siddhartha Gandhi Date: Tue, 11 Oct 2022 21:21:16 -0400 Subject: [PATCH 6/6] Use explicit type annotations per feedback --- tests/test_pandas.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index ba10e8494..456f94947 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -264,7 +264,9 @@ def test_isna() -> None: # and as a result the type narrowing does not always work as it intuitively should # There is a proposal being floated for a StrictTypeGuard that will have more rigid narrowing semantics # In the test cases below, a commented out assertion will be included to document the optimal test result - nullable1 = random.choice(["value", None, pd.NA, pd.NaT]) + nullable1: str | None | NAType | NaTType = random.choice( + ["value", None, pd.NA, pd.NaT] + ) if pd.notna(nullable1): check(assert_type(nullable1, str), str) if not pd.isna(nullable1): @@ -276,7 +278,7 @@ def test_isna() -> None: # assert_type(nullable1, Union[NaTType, NAType, None]) # TODO: Desired result (see comments above) assert_type(nullable1, Union[str, NaTType, NAType, None]) - nullable2 = random.choice([2, None]) + nullable2: int | None = random.choice([2, None]) if pd.notna(nullable2): check(assert_type(nullable2, int), int) if not pd.isna(nullable2): @@ -294,7 +296,7 @@ def test_isna() -> None: # ) # Pyright result pass - nullable3 = random.choice([True, None, pd.NA]) + nullable3: bool | None | NAType = random.choice([True, None, pd.NA]) if pd.notna(nullable3): check(assert_type(nullable3, bool), bool) if not pd.isna(nullable3):