Skip to content

ENH: Add support to customize time format and more detailed error handling information for to_time #59931

Open
@Lzforevr

Description

@Lzforevr

Feature Type

  • Adding new functionality to pandas

  • Changing existing functionality in pandas

  • Removing existing functionality in pandas

Problem Description

Path: pandas/core/tools/times.py
Func: to_time

Though to_time is able to handle several common time formats, I still wish to enhance its versatility and adaptability by customizing time format. By the way, customized error handling infos are also of use (though simple).

It's my pleasure if you agree with my changes and I'd love to make PR!

Feature Description

I've modified times.py for the above enhancements, and I've made a test demo to show its new feature in times_test.py.

  1. Improved compatibility: Allows users to customize the time format, which improves the versatility and adaptability of the function.
  2. Enhanced error handling: Provide more detailed error information for easy debugging and problem location.

Alternative Solutions

The source code are as follows:

  1. new times.py
# modfied times.py
from __future__ import annotations
from datetime import datetime, time
from typing import TYPE_CHECKING, List, Union
import numpy as np
from pandas._libs.lib import is_list_like
from pandas.core.dtypes.generic import ABCIndex, ABCSeries
from pandas.core.dtypes.missing import notna

if TYPE_CHECKING:
    from pandas._typing import DateTimeErrorChoices


def to_time(
        arg,
        format: Union[str, List[str], None] = None,
        infer_time_format: bool = False,
        errors: DateTimeErrorChoices = "raise",
        custom_formats: List[str] = None,
):
    """
Parse time strings to time objects using fixed strptime formats ("%H:%M",
"%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p") and additional custom formats.

Use infer_time_format if all the strings are in the same format to speed
up conversion.

Parameters
----------
arg : string in time format, datetime.time, list, tuple, 1-d array, Series
format : str or list of str, default None
    Format(s) used to convert arg into a time object. If None, fixed
    formats are used.
infer_time_format: bool, default False
    Infer the time format based on the first non-NaN element. If all
    strings are in the same format, this will speed up conversion.
errors : {'raise', 'coerce'}, default 'raise'
    - If 'raise', then invalid parsing will raise an exception
    - If 'coerce', then invalid parsing will be set as None
custom_formats : list of str, default None
    Additional custom time formats to use.
Returns
-------
datetime.time or list of datetime.time
"""
    if errors not in ("raise", "coerce"):
        raise ValueError("errors must be one of 'raise', or 'coerce'.")

    def _convert_listlike(arg, format):
        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype="O")

        elif getattr(arg, "ndim", 1) > 1:
            raise TypeError(
                "arg must be a string, datetime, list, tuple, 1-d array, or Series"
            )
        arg = np.asarray(arg, dtype="O")

        if infer_time_format and format is None:
            format = _guess_time_format_for_array(arg)
        times = []
        if format is not None:
            if isinstance(format, list):
                for element in arg:
                    for fmt in format:
                        try:
                            times.append(datetime.strptime(element, fmt).time())
                            break
                        except (ValueError, TypeError):
                            continue
                    else:
                        if errors == "raise":
                            msg = (
                                f"Cannot convert {element} to a time with given "f"formats {format}")
                            raise ValueError(msg)
                        times.append(None)
            else:
                for element in arg:
                    try:
                        times.append(datetime.strptime(element, format).time())
                    except (ValueError, TypeError) as err:
                        if errors == "raise":
                            msg = (f"Cannot convert {element} to a time withgiven "f"format {format}")
                            raise ValueError(msg) from err
                        times.append(None)
        else:
            formats = _time_formats + (custom_formats or [])
            for element in arg:
                time_object = None
                try:
                    time_object = time.fromisoformat(element)
                except (ValueError, TypeError):
                    for time_format in formats:
                        try:
                            time_object = datetime.strptime(element, time_format).time()
                            break
                        except (ValueError, TypeError):
                            continue
                if time_object is not None:
                    times.append(time_object)
                elif errors == "raise":
                    raise ValueError(f"Cannot convert arg {arg} to a time")
                else:
                    times.append(None)
        return times

    if arg is None:
        return arg
    elif isinstance(arg, time):
        return arg
    elif isinstance(arg, ABCSeries):
        values = _convert_listlike(arg._values, format)
        return arg._constructor(values, index=arg.index, name=arg.name)
    elif isinstance(arg, ABCIndex):
        return _convert_listlike(arg, format)
    elif is_list_like(arg):
        return _convert_listlike(arg, format)
    return _convert_listlike(np.array([arg]), format)[0]


# Fixed time formats for time parsing
_time_formats = [
    "%H:%M",
    "%H%M",
    "%I:%M%p",
    "%I%M%p",
    "%H:%M:%S",
    "%H%M%S",
    "%I:%M:%S%p",
    "%I%M%S%p",
]


def _guess_time_format_for_array(arr):
    # Try to guess the format based on the first non-NaN element
    non_nan_elements = notna(arr).nonzero()[0]
    if len(non_nan_elements):
        element = arr[non_nan_elements[0]]
        for time_format in _time_formats:
            try:
                datetime.strptime(element, time_format)
                return time_format
            except ValueError:
                pass
    return None
  1. test demo (under the same directory of times.py)
# times_test.py
from datetime import time

import pandas as pd
import numpy as np
from times import to_time


# Test code
def test_to_time():
    """
    - Tests the `to_time` function with various input formats
    and demonstrates how it handles invalid inputs.
    - return: None
    """
    # Single time string
    single_time_str = "12:30"
    print("Single time string:", to_time(single_time_str))
    # Time object
    time_obj = time(12, 30)
    print("Time object:", to_time(time_obj))
    # List of time strings
    time_list = ["12:30", "13:45"]
    print("List of time strings:", to_time(time_list))
    # NumPy array of time strings
    time_array = np.array(["12:30", "13:45"])
    print("NumPy array of time strings:", to_time(time_array))
    # Pandas Series of time strings
    time_series = pd.Series(["12:30", "13:45"])
    print("Pandas Series of time strings:", to_time(time_series))

    # Error handling
    invalid_list = ["12:30", "invalid"]
    try:
        print("Invalid list:", to_time(invalid_list))
    except ValueError as e:
        print("Error:", e)

    # Error handling (coerce)
    invalid_list_coerce = ["12:30", "invalid"]
    print("Invalid list (coerce):", to_time(invalid_list_coerce, errors="coerce"))
    # Custom format
    custom_format = ["%H:%M:%S.%f"]
    custom_time_list = ["12:30:45.123456", "13:45:00.987654"]
    print("Custom time list:", to_time(custom_time_list, custom_formats=custom_format))
    # Inferred time list
    inferred_time_list = ["12:30", "13:45"]
    print("Inferred time list:", to_time(inferred_time_list, infer_time_format=True))
    # ISO 8601 time list
    iso_time_list = ["12:30:45", "13:45:00"]
    print("ISO 8601 time list:", to_time(iso_time_list))


test_to_time()
  1. result for times_test.py
Single time string: 12:30:00
Time object: 12:30:00
List of time strings: [datetime.time(12, 30), datetime.time(13, 45)]
NumPy array of time strings: [datetime.time(12, 30), datetime.time(13, 45)]
Pandas Series of time strings: 0    12:30:00
1    13:45:00
dtype: object
Error: Cannot convert arg ['12:30' 'invalid'] to a time
Invalid list (coerce): [datetime.time(12, 30), None]
Custom time list: [datetime.time(12, 30, 45, 123456), datetime.time(13, 45, 0, 987654)]
Inferred time list: [datetime.time(12, 30), datetime.time(13, 45)]
ISO 8601 time list: [datetime.time(12, 30, 45), datetime.time(13, 45)]

Additional Context

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    EnhancementNeeds InfoClarification about behavior needed to assess issuedatetime.datestdlib datetime.date support

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions