File tree 3 files changed +25
-4
lines changed
3 files changed +25
-4
lines changed Original file line number Diff line number Diff line change @@ -327,6 +327,7 @@ Enhancements
327
327
- ``DataFrame.to_stata`` and ``StataWriter`` will accept keyword arguments time_stamp
328
328
and data_label which allow the time stamp and dataset label to be set when creating a
329
329
file. (:issue:`6545`)
330
+ - ``pandas.io.gbq`` now handles reading unicode strings properly. (:issue:`5940`)
330
331
331
332
Performance
332
333
~~~~~~~~~~~
Original file line number Diff line number Diff line change 9
9
from datetime import datetime
10
10
import pkg_resources
11
11
from distutils .version import LooseVersion
12
+ from pandas .compat import u
12
13
13
14
import pandas as pd
14
15
import numpy as np
@@ -117,9 +118,8 @@ def _parse_entry(field_value, field_type):
117
118
field_value = np .datetime64 (timestamp )
118
119
elif field_type == 'BOOLEAN' :
119
120
field_value = field_value == 'true'
120
- # Note that results are unicode, so this will
121
- # fail for non-ASCII characters.. this probably
122
- # functions differently in Python 3
121
+ elif field_type == 'STRING' :
122
+ field_value = field_value
123
123
else :
124
124
field_value = str (field_value )
125
125
return field_value
Original file line number Diff line number Diff line change 11
11
12
12
from pandas .core .frame import DataFrame
13
13
from pandas .util .testing import with_connectivity_check
14
+ from pandas .compat import u
14
15
from pandas import NaT
15
16
16
17
@@ -193,9 +194,28 @@ def test_type_conversion(self):
193
194
np .bool (False ),
194
195
np .int ('2' ),
195
196
np .float ('3.14159' ),
196
- 'Hello World' ]
197
+ u ( 'Hello World' ) ]
197
198
self .assertEqual (actual_output , sample_output , 'A format conversion failed' )
198
199
200
+ @with_connectivity_check
201
+ def test_unicode_string_conversion (self ):
202
+ # Strings from BigQuery Should be converted to UTF-8 properly
203
+
204
+ if not os .path .exists (self .bq_token ):
205
+ raise nose .SkipTest ('Skipped because authentication information is not available.' )
206
+
207
+ correct_test_datatype = DataFrame (
208
+ {'UNICODE_STRING' : [u ("\xe9 \xfc " )]}
209
+ )
210
+
211
+ query = """SELECT '\xc3 \xa9 \xc3 \xbc ' as UNICODE_STRING"""
212
+
213
+ client = gbq ._authenticate ()
214
+ a = gbq .read_gbq (query )
215
+ tm .assert_frame_equal (a , correct_test_datatype )
216
+
217
+
218
+
199
219
def test_data_small (self ):
200
220
# Parsing a fixed page of data should return the proper fixed np.array()
201
221
result_frame = gbq ._parse_page (self .test_data_small ,
You can’t perform that action at this time.
0 commit comments