5
5
import codecs
6
6
import csv
7
7
from datetime import datetime
8
- from io import BytesIO , StringIO
8
+ from io import StringIO
9
9
import os
10
10
import platform
11
11
from tempfile import TemporaryFile
@@ -69,17 +69,6 @@ def _set_noconvert_columns(self):
69
69
tm .assert_frame_equal (result , expected )
70
70
71
71
72
- def test_bytes_io_input (all_parsers ):
73
- encoding = "cp1255"
74
- parser = all_parsers
75
-
76
- data = BytesIO ("שלום:1234\n 562:123" .encode (encoding ))
77
- result = parser .read_csv (data , sep = ":" , encoding = encoding )
78
-
79
- expected = DataFrame ([[562 , 123 ]], columns = ["שלום" , "1234" ])
80
- tm .assert_frame_equal (result , expected )
81
-
82
-
83
72
def test_empty_decimal_marker (all_parsers ):
84
73
data = """A|B|C
85
74
1|2,334|5
@@ -316,15 +305,6 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path):
316
305
tm .assert_frame_equal (result , expected )
317
306
318
307
319
- def test_read_csv_unicode (all_parsers ):
320
- parser = all_parsers
321
- data = BytesIO ("\u0141 aski, Jan;1" .encode ("utf-8" ))
322
-
323
- result = parser .read_csv (data , sep = ";" , encoding = "utf-8" , header = None )
324
- expected = DataFrame ([["\u0141 aski, Jan" , 1 ]])
325
- tm .assert_frame_equal (result , expected )
326
-
327
-
328
308
def test_read_csv_wrong_num_columns (all_parsers ):
329
309
# Too few columns.
330
310
data = """A,B,C,D,E,F
@@ -1064,59 +1044,6 @@ def test_skip_initial_space(all_parsers):
1064
1044
tm .assert_frame_equal (result , expected )
1065
1045
1066
1046
1067
- @pytest .mark .parametrize ("sep" , ["," , "\t " ])
1068
- @pytest .mark .parametrize ("encoding" , ["utf-16" , "utf-16le" , "utf-16be" ])
1069
- def test_utf16_bom_skiprows (all_parsers , sep , encoding ):
1070
- # see gh-2298
1071
- parser = all_parsers
1072
- data = """skip this
1073
- skip this too
1074
- A,B,C
1075
- 1,2,3
1076
- 4,5,6""" .replace (
1077
- "," , sep
1078
- )
1079
- path = "__{}__.csv" .format (tm .rands (10 ))
1080
- kwargs = dict (sep = sep , skiprows = 2 )
1081
- utf8 = "utf-8"
1082
-
1083
- with tm .ensure_clean (path ) as path :
1084
- from io import TextIOWrapper
1085
-
1086
- bytes_data = data .encode (encoding )
1087
-
1088
- with open (path , "wb" ) as f :
1089
- f .write (bytes_data )
1090
-
1091
- bytes_buffer = BytesIO (data .encode (utf8 ))
1092
- bytes_buffer = TextIOWrapper (bytes_buffer , encoding = utf8 )
1093
-
1094
- result = parser .read_csv (path , encoding = encoding , ** kwargs )
1095
- expected = parser .read_csv (bytes_buffer , encoding = utf8 , ** kwargs )
1096
-
1097
- bytes_buffer .close ()
1098
- tm .assert_frame_equal (result , expected )
1099
-
1100
-
1101
- def test_utf16_example (all_parsers , csv_dir_path ):
1102
- path = os .path .join (csv_dir_path , "utf16_ex.txt" )
1103
- parser = all_parsers
1104
- result = parser .read_csv (path , encoding = "utf-16" , sep = "\t " )
1105
- assert len (result ) == 50
1106
-
1107
-
1108
- def test_unicode_encoding (all_parsers , csv_dir_path ):
1109
- path = os .path .join (csv_dir_path , "unicode_series.csv" )
1110
- parser = all_parsers
1111
-
1112
- result = parser .read_csv (path , header = None , encoding = "latin-1" )
1113
- result = result .set_index (0 )
1114
- got = result [1 ][1632 ]
1115
-
1116
- expected = "\xc1 k\xf6 ldum klaka (Cold Fever) (1994)"
1117
- assert got == expected
1118
-
1119
-
1120
1047
def test_trailing_delimiters (all_parsers ):
1121
1048
# see gh-2442
1122
1049
data = """A,B,C
@@ -1915,39 +1842,6 @@ def test_null_byte_char(all_parsers):
1915
1842
parser .read_csv (StringIO (data ), names = names )
1916
1843
1917
1844
1918
- @pytest .mark .parametrize (
1919
- "data,kwargs,expected" ,
1920
- [
1921
- # Basic test
1922
- ("a\n 1" , dict (), DataFrame ({"a" : [1 ]})),
1923
- # "Regular" quoting
1924
- ('"a"\n 1' , dict (quotechar = '"' ), DataFrame ({"a" : [1 ]})),
1925
- # Test in a data row instead of header
1926
- ("b\n 1" , dict (names = ["a" ]), DataFrame ({"a" : ["b" , "1" ]})),
1927
- # Test in empty data row with skipping
1928
- ("\n 1" , dict (names = ["a" ], skip_blank_lines = True ), DataFrame ({"a" : [1 ]})),
1929
- # Test in empty data row without skipping
1930
- (
1931
- "\n 1" ,
1932
- dict (names = ["a" ], skip_blank_lines = False ),
1933
- DataFrame ({"a" : [np .nan , 1 ]}),
1934
- ),
1935
- ],
1936
- )
1937
- def test_utf8_bom (all_parsers , data , kwargs , expected ):
1938
- # see gh-4793
1939
- parser = all_parsers
1940
- bom = "\ufeff "
1941
- utf8 = "utf-8"
1942
-
1943
- def _encode_data_with_bom (_data ):
1944
- bom_data = (bom + _data ).encode (utf8 )
1945
- return BytesIO (bom_data )
1946
-
1947
- result = parser .read_csv (_encode_data_with_bom (data ), encoding = utf8 , ** kwargs )
1948
- tm .assert_frame_equal (result , expected )
1949
-
1950
-
1951
1845
def test_temporary_file (all_parsers ):
1952
1846
# see gh-13398
1953
1847
parser = all_parsers
@@ -1965,20 +1859,6 @@ def test_temporary_file(all_parsers):
1965
1859
tm .assert_frame_equal (result , expected )
1966
1860
1967
1861
1968
- @pytest .mark .parametrize ("byte" , [8 , 16 ])
1969
- @pytest .mark .parametrize ("fmt" , ["utf-{0}" , "utf_{0}" , "UTF-{0}" , "UTF_{0}" ])
1970
- def test_read_csv_utf_aliases (all_parsers , byte , fmt ):
1971
- # see gh-13549
1972
- expected = DataFrame ({"mb_num" : [4.8 ], "multibyte" : ["test" ]})
1973
- parser = all_parsers
1974
-
1975
- encoding = fmt .format (byte )
1976
- data = "mb_num,multibyte\n 4.8,test" .encode (encoding )
1977
-
1978
- result = parser .read_csv (BytesIO (data ), encoding = encoding )
1979
- tm .assert_frame_equal (result , expected )
1980
-
1981
-
1982
1862
def test_internal_eof_byte (all_parsers ):
1983
1863
# see gh-5500
1984
1864
parser = all_parsers
@@ -2038,30 +1918,6 @@ def test_file_handles_with_open(all_parsers, csv1):
2038
1918
assert not f .closed
2039
1919
2040
1920
2041
- @pytest .mark .parametrize (
2042
- "fname,encoding" ,
2043
- [
2044
- ("test1.csv" , "utf-8" ),
2045
- ("unicode_series.csv" , "latin-1" ),
2046
- ("sauron.SHIFT_JIS.csv" , "shiftjis" ),
2047
- ],
2048
- )
2049
- def test_binary_mode_file_buffers (all_parsers , csv_dir_path , fname , encoding ):
2050
- # gh-23779: Python csv engine shouldn't error on files opened in binary.
2051
- parser = all_parsers
2052
-
2053
- fpath = os .path .join (csv_dir_path , fname )
2054
- expected = parser .read_csv (fpath , encoding = encoding )
2055
-
2056
- with open (fpath , mode = "r" , encoding = encoding ) as fa :
2057
- result = parser .read_csv (fa )
2058
- tm .assert_frame_equal (expected , result )
2059
-
2060
- with open (fpath , mode = "rb" ) as fb :
2061
- result = parser .read_csv (fb , encoding = encoding )
2062
- tm .assert_frame_equal (expected , result )
2063
-
2064
-
2065
1921
def test_invalid_file_buffer_class (all_parsers ):
2066
1922
# see gh-15337
2067
1923
class InvalidBuffer :
0 commit comments