From 0f30fc5c220dec14d64cf86e2f75e36a20dc77fe Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Sun, 7 Jul 2019 17:08:47 +0300 Subject: [PATCH 1/4] Break added --- pandas/_libs/lib.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1df220029def6..1936404b75602 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1955,6 +1955,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, seen.timedelta_ = 1 if not (convert_datetime or convert_timedelta): seen.object_ = 1 + break elif util.is_bool_object(val): seen.bool_ = 1 bools[i] = val From 56002cddb8f1b59b78d6f3c6d4e7b05989295f98 Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Sun, 7 Jul 2019 17:08:47 +0300 Subject: [PATCH 2/4] Asv bench and what's new entry added. --- asv_bench/benchmarks/algorithms.py | 14 ++++++++++++++ doc/source/whatsnew/v0.25.0.rst | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 436093ef195ef..1dc2f79461544 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -4,6 +4,7 @@ import pandas as pd from pandas.util import testing as tm +from pandas._libs import lib for imp in ["pandas.util", "pandas.tools.hashing"]: try: @@ -13,6 +14,19 @@ pass +class MaybeConvertObjects: + def setup(self): + N = 10 ** 5 + + data = list(range(N)) + data[0] = pd.NaT + data = np.array(data) + self.data = data + + def time_maybe_convert_objects(self): + lib.maybe_convert_objects(self.data) + + class Factorize: params = [[True, False], ["int", "uint", "float", "string"]] diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 68ecb4c487a1e..2035e7e1c6c26 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -939,7 +939,7 @@ Performance improvements - Improved performance by removing the need for a garbage collect when checking for ``SettingWithCopyWarning`` (:issue:`27031`) - For :meth:`to_datetime` changed default value of cache parameter to ``True`` (:issue:`26043`) - Improved performance of :class:`DatetimeIndex` and :class:`PeriodIndex` slicing given non-unique, monotonic data (:issue:`27136`). - +- Improved performance of :meth:`pandas._libs.lib.maybe_convert_objects` for the case when input contains ``NaT``. .. _whatsnew_0250.bug_fixes: From 790bf8cbe82e4d11336cc38849f1da960d57d3bd Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Sun, 7 Jul 2019 17:08:47 +0300 Subject: [PATCH 3/4] Imports order changed. What's new entry changed. --- asv_bench/benchmarks/algorithms.py | 2 +- doc/source/whatsnew/v0.25.0.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 1dc2f79461544..ae1453de41f3a 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -3,8 +3,8 @@ import numpy as np import pandas as pd -from pandas.util import testing as tm from pandas._libs import lib +from pandas.util import testing as tm for imp in ["pandas.util", "pandas.tools.hashing"]: try: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2035e7e1c6c26..9de9ca14244f8 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -939,7 +939,7 @@ Performance improvements - Improved performance by removing the need for a garbage collect when checking for ``SettingWithCopyWarning`` (:issue:`27031`) - For :meth:`to_datetime` changed default value of cache parameter to ``True`` (:issue:`26043`) - Improved performance of :class:`DatetimeIndex` and :class:`PeriodIndex` slicing given non-unique, monotonic data (:issue:`27136`). -- Improved performance of :meth:`pandas._libs.lib.maybe_convert_objects` for the case when input contains ``NaT``. +- Improved performance of converting ``np.ndarray`` with object dtype for the case when input contains ``NaT`` (:issue:`27299`). .. _whatsnew_0250.bug_fixes: From e8173c617320c8eb8234e0602a85686397ef7637 Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Sun, 7 Jul 2019 17:08:47 +0300 Subject: [PATCH 4/4] Fix imports order, remove what's new entry. --- asv_bench/benchmarks/algorithms.py | 3 ++- doc/source/whatsnew/v0.25.0.rst | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index ae1453de41f3a..7d97f2c740acb 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -2,8 +2,9 @@ import numpy as np -import pandas as pd from pandas._libs import lib + +import pandas as pd from pandas.util import testing as tm for imp in ["pandas.util", "pandas.tools.hashing"]: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 9de9ca14244f8..8c472cb3121d2 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -939,7 +939,6 @@ Performance improvements - Improved performance by removing the need for a garbage collect when checking for ``SettingWithCopyWarning`` (:issue:`27031`) - For :meth:`to_datetime` changed default value of cache parameter to ``True`` (:issue:`26043`) - Improved performance of :class:`DatetimeIndex` and :class:`PeriodIndex` slicing given non-unique, monotonic data (:issue:`27136`). -- Improved performance of converting ``np.ndarray`` with object dtype for the case when input contains ``NaT`` (:issue:`27299`). .. _whatsnew_0250.bug_fixes: