Open
Description
Feature Type
-
Adding new functionality to pandas
-
Changing existing functionality in pandas
-
Removing existing functionality in pandas
Problem Description
Path: pandas/core/tools/times.py
Func: to_time
Though to_time is able to handle several common time formats, I still wish to enhance its versatility and adaptability by customizing time format. By the way, customized error handling infos are also of use (though simple).
It's my pleasure if you agree with my changes and I'd love to make PR!
Feature Description
I've modified times.py for the above enhancements, and I've made a test demo to show its new feature in times_test.py.
- Improved compatibility: Allows users to customize the time format, which improves the versatility and adaptability of the function.
- Enhanced error handling: Provide more detailed error information for easy debugging and problem location.
Alternative Solutions
The source code are as follows:
- new times.py
# modfied times.py
from __future__ import annotations
from datetime import datetime, time
from typing import TYPE_CHECKING, List, Union
import numpy as np
from pandas._libs.lib import is_list_like
from pandas.core.dtypes.generic import ABCIndex, ABCSeries
from pandas.core.dtypes.missing import notna
if TYPE_CHECKING:
from pandas._typing import DateTimeErrorChoices
def to_time(
arg,
format: Union[str, List[str], None] = None,
infer_time_format: bool = False,
errors: DateTimeErrorChoices = "raise",
custom_formats: List[str] = None,
):
"""
Parse time strings to time objects using fixed strptime formats ("%H:%M",
"%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p") and additional custom formats.
Use infer_time_format if all the strings are in the same format to speed
up conversion.
Parameters
----------
arg : string in time format, datetime.time, list, tuple, 1-d array, Series
format : str or list of str, default None
Format(s) used to convert arg into a time object. If None, fixed
formats are used.
infer_time_format: bool, default False
Infer the time format based on the first non-NaN element. If all
strings are in the same format, this will speed up conversion.
errors : {'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as None
custom_formats : list of str, default None
Additional custom time formats to use.
Returns
-------
datetime.time or list of datetime.time
"""
if errors not in ("raise", "coerce"):
raise ValueError("errors must be one of 'raise', or 'coerce'.")
def _convert_listlike(arg, format):
if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype="O")
elif getattr(arg, "ndim", 1) > 1:
raise TypeError(
"arg must be a string, datetime, list, tuple, 1-d array, or Series"
)
arg = np.asarray(arg, dtype="O")
if infer_time_format and format is None:
format = _guess_time_format_for_array(arg)
times = []
if format is not None:
if isinstance(format, list):
for element in arg:
for fmt in format:
try:
times.append(datetime.strptime(element, fmt).time())
break
except (ValueError, TypeError):
continue
else:
if errors == "raise":
msg = (
f"Cannot convert {element} to a time with given "f"formats {format}")
raise ValueError(msg)
times.append(None)
else:
for element in arg:
try:
times.append(datetime.strptime(element, format).time())
except (ValueError, TypeError) as err:
if errors == "raise":
msg = (f"Cannot convert {element} to a time withgiven "f"format {format}")
raise ValueError(msg) from err
times.append(None)
else:
formats = _time_formats + (custom_formats or [])
for element in arg:
time_object = None
try:
time_object = time.fromisoformat(element)
except (ValueError, TypeError):
for time_format in formats:
try:
time_object = datetime.strptime(element, time_format).time()
break
except (ValueError, TypeError):
continue
if time_object is not None:
times.append(time_object)
elif errors == "raise":
raise ValueError(f"Cannot convert arg {arg} to a time")
else:
times.append(None)
return times
if arg is None:
return arg
elif isinstance(arg, time):
return arg
elif isinstance(arg, ABCSeries):
values = _convert_listlike(arg._values, format)
return arg._constructor(values, index=arg.index, name=arg.name)
elif isinstance(arg, ABCIndex):
return _convert_listlike(arg, format)
elif is_list_like(arg):
return _convert_listlike(arg, format)
return _convert_listlike(np.array([arg]), format)[0]
# Fixed time formats for time parsing
_time_formats = [
"%H:%M",
"%H%M",
"%I:%M%p",
"%I%M%p",
"%H:%M:%S",
"%H%M%S",
"%I:%M:%S%p",
"%I%M%S%p",
]
def _guess_time_format_for_array(arr):
# Try to guess the format based on the first non-NaN element
non_nan_elements = notna(arr).nonzero()[0]
if len(non_nan_elements):
element = arr[non_nan_elements[0]]
for time_format in _time_formats:
try:
datetime.strptime(element, time_format)
return time_format
except ValueError:
pass
return None
- test demo (under the same directory of times.py)
# times_test.py
from datetime import time
import pandas as pd
import numpy as np
from times import to_time
# Test code
def test_to_time():
"""
- Tests the `to_time` function with various input formats
and demonstrates how it handles invalid inputs.
- return: None
"""
# Single time string
single_time_str = "12:30"
print("Single time string:", to_time(single_time_str))
# Time object
time_obj = time(12, 30)
print("Time object:", to_time(time_obj))
# List of time strings
time_list = ["12:30", "13:45"]
print("List of time strings:", to_time(time_list))
# NumPy array of time strings
time_array = np.array(["12:30", "13:45"])
print("NumPy array of time strings:", to_time(time_array))
# Pandas Series of time strings
time_series = pd.Series(["12:30", "13:45"])
print("Pandas Series of time strings:", to_time(time_series))
# Error handling
invalid_list = ["12:30", "invalid"]
try:
print("Invalid list:", to_time(invalid_list))
except ValueError as e:
print("Error:", e)
# Error handling (coerce)
invalid_list_coerce = ["12:30", "invalid"]
print("Invalid list (coerce):", to_time(invalid_list_coerce, errors="coerce"))
# Custom format
custom_format = ["%H:%M:%S.%f"]
custom_time_list = ["12:30:45.123456", "13:45:00.987654"]
print("Custom time list:", to_time(custom_time_list, custom_formats=custom_format))
# Inferred time list
inferred_time_list = ["12:30", "13:45"]
print("Inferred time list:", to_time(inferred_time_list, infer_time_format=True))
# ISO 8601 time list
iso_time_list = ["12:30:45", "13:45:00"]
print("ISO 8601 time list:", to_time(iso_time_list))
test_to_time()
- result for times_test.py
Single time string: 12:30:00
Time object: 12:30:00
List of time strings: [datetime.time(12, 30), datetime.time(13, 45)]
NumPy array of time strings: [datetime.time(12, 30), datetime.time(13, 45)]
Pandas Series of time strings: 0 12:30:00
1 13:45:00
dtype: object
Error: Cannot convert arg ['12:30' 'invalid'] to a time
Invalid list (coerce): [datetime.time(12, 30), None]
Custom time list: [datetime.time(12, 30, 45, 123456), datetime.time(13, 45, 0, 987654)]
Inferred time list: [datetime.time(12, 30), datetime.time(13, 45)]
ISO 8601 time list: [datetime.time(12, 30, 45), datetime.time(13, 45)]
Additional Context
No response