@@ -432,7 +432,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
432
432
d = parse_dates_safe (dates , year = True )
433
433
conv_dates = d .year
434
434
else :
435
- raise ValueError ("fmt %s not understood" % fmt )
435
+ raise NotImplementedError ("Conversion from format %s "
436
+ "is not implemented" % fmt )
436
437
437
438
conv_dates = Series (conv_dates , dtype = np .float64 )
438
439
missing_value = struct .unpack ('<d' , b'\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \x7f ' )[0 ]
@@ -1709,7 +1710,7 @@ def _convert_datetime_to_stata_type(fmt):
1709
1710
"%tq" , "th" , "%th" , "ty" , "%ty" ]:
1710
1711
return np .float64 # Stata expects doubles for SIFs
1711
1712
else :
1712
- raise ValueError ( "fmt %s not understood " % fmt )
1713
+ raise NotImplementedError ( "Format %s not implemented " % fmt )
1713
1714
1714
1715
1715
1716
def _maybe_convert_to_int_keys (convert_dates , varlist ):
@@ -1721,9 +1722,8 @@ def _maybe_convert_to_int_keys(convert_dates, varlist):
1721
1722
new_dict .update ({varlist .index (key ): convert_dates [key ]})
1722
1723
else :
1723
1724
if not isinstance (key , int ):
1724
- raise ValueError (
1725
- "convert_dates key is not in varlist and is not an int"
1726
- )
1725
+ raise ValueError ("convert_dates key must be a "
1726
+ "column or an integer" )
1727
1727
new_dict .update ({key : convert_dates [key ]})
1728
1728
return new_dict
1729
1729
@@ -1763,8 +1763,7 @@ def _dtype_to_stata_type(dtype, column):
1763
1763
elif dtype == np .int8 :
1764
1764
return chr (251 )
1765
1765
else : # pragma : no cover
1766
- raise ValueError ("Data type %s not currently understood. "
1767
- "Please report an error to the developers." % dtype )
1766
+ raise NotImplementedError ("Data type %s not supported." % dtype )
1768
1767
1769
1768
1770
1769
def _dtype_to_default_stata_fmt (dtype , column ):
@@ -1801,35 +1800,36 @@ def _dtype_to_default_stata_fmt(dtype, column):
1801
1800
elif dtype == np .int8 or dtype == np .int16 :
1802
1801
return "%8.0g"
1803
1802
else : # pragma : no cover
1804
- raise ValueError ("Data type %s not currently understood. "
1805
- "Please report an error to the developers." % dtype )
1803
+ raise NotImplementedError ("Data type %s not supported." % dtype )
1806
1804
1807
1805
1808
1806
class StataWriter (StataParser ):
1809
1807
"""
1810
- A class for writing Stata binary dta files from array-like objects
1808
+ A class for writing Stata binary dta files
1811
1809
1812
1810
Parameters
1813
1811
----------
1814
- fname : file path or buffer
1815
- Where to save the dta file.
1816
- data : array-like
1817
- Array-like input to save. Pandas objects are also accepted.
1812
+ fname : str or buffer
1813
+ String path of file-like object
1814
+ data : DataFrame
1815
+ Input to save
1818
1816
convert_dates : dict
1819
- Dictionary mapping column of datetime types to the stata internal
1820
- format that you want to use for the dates. Options are
1821
- 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either a
1822
- number or a name.
1817
+ Dictionary mapping columns containing datetime types to Stata internal
1818
+ format to use when writing the dates. Options are 'tc', 'td', 'tm',
1819
+ 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name.
1820
+ Datetime columns that do not have a conversion type specified will be
1821
+ converted to 'tc'. Datetime columns with timezone information are not
1822
+ supported.
1823
+ write_index : bool
1824
+ Write the index to Stata dataset.
1823
1825
encoding : str
1824
- Default is latin-1. Note that Stata does not support unicode.
1826
+ Default is latin-1. Unicode is not supported
1825
1827
byteorder : str
1826
- Can be ">", "<", "little", or "big". The default is None which uses
1827
- `sys.byteorder`
1828
+ Can be ">", "<", "little", or "big". default is `sys.byteorder`
1828
1829
time_stamp : datetime
1829
- A date time to use when writing the file. Can be None, in which
1830
- case the current time is used.
1830
+ A datetime to use as file creation date. Default is the current time
1831
1831
dataset_label : str
1832
- A label for the data set. Should be 80 characters or smaller.
1832
+ A label for the data set. Must be 80 characters or smaller.
1833
1833
1834
1834
.. versionadded:: 0.19.0
1835
1835
@@ -1843,6 +1843,17 @@ class StataWriter(StataParser):
1843
1843
The StataWriter instance has a write_file method, which will
1844
1844
write the file to the given `fname`.
1845
1845
1846
+ Raises
1847
+ ------
1848
+ NotImplementedError
1849
+ * If datetimes contain timezone information
1850
+ * Column dtype is not representable in Stata
1851
+ ValueError
1852
+ * Columns listed in convert_dates are contain values other than
1853
+ datetime64[ns] or datetime.datetime
1854
+ * Column listed in convert_dates is not in DataFrame
1855
+ * Categorical label contains more than 32,000 characters
1856
+
1846
1857
Examples
1847
1858
--------
1848
1859
>>> import pandas as pd
@@ -1861,7 +1872,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True,
1861
1872
encoding = "latin-1" , byteorder = None , time_stamp = None ,
1862
1873
data_label = None , variable_labels = None ):
1863
1874
super (StataWriter , self ).__init__ (encoding )
1864
- self ._convert_dates = convert_dates
1875
+ self ._convert_dates = {} if convert_dates is None else convert_dates
1865
1876
self ._write_index = write_index
1866
1877
self ._time_stamp = time_stamp
1867
1878
self ._data_label = data_label
@@ -2041,15 +2052,22 @@ def _prepare_pandas(self, data):
2041
2052
self .varlist = data .columns .tolist ()
2042
2053
2043
2054
dtypes = data .dtypes
2044
- if self ._convert_dates is not None :
2045
- self ._convert_dates = _maybe_convert_to_int_keys (
2046
- self ._convert_dates , self .varlist
2055
+
2056
+ # Ensure all date columns are converted
2057
+ for col in data :
2058
+ if col in self ._convert_dates :
2059
+ continue
2060
+ if is_datetime64_dtype (data [col ]):
2061
+ self ._convert_dates [col ] = 'tc'
2062
+
2063
+ self ._convert_dates = _maybe_convert_to_int_keys (self ._convert_dates ,
2064
+ self .varlist )
2065
+ for key in self ._convert_dates :
2066
+ new_type = _convert_datetime_to_stata_type (
2067
+ self ._convert_dates [key ]
2047
2068
)
2048
- for key in self ._convert_dates :
2049
- new_type = _convert_datetime_to_stata_type (
2050
- self ._convert_dates [key ]
2051
- )
2052
- dtypes [key ] = np .dtype (new_type )
2069
+ dtypes [key ] = np .dtype (new_type )
2070
+
2053
2071
self .typlist = []
2054
2072
self .fmtlist = []
2055
2073
for col , dtype in dtypes .iteritems ():
0 commit comments