Skip to content

Commit 8361ca6

Browse files
Jacob SchaerPairing Station West
Jacob Schaer
authored and
Pairing Station West
committed
Added test and basic unicode support
Added network connectivity flag to test for unicode Fixed Documentation
1 parent 403f778 commit 8361ca6

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

doc/source/v0.14.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ Enhancements
327327
- ``DataFrame.to_stata`` and ``StataWriter`` will accept keyword arguments time_stamp
328328
and data_label which allow the time stamp and dataset label to be set when creating a
329329
file. (:issue:`6545`)
330+
- ``pandas.io.gbq`` now handles reading unicode strings properly. (:issue:`5940`)
330331

331332
Performance
332333
~~~~~~~~~~~

pandas/io/gbq.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from datetime import datetime
1010
import pkg_resources
1111
from distutils.version import LooseVersion
12+
from pandas.compat import u
1213

1314
import pandas as pd
1415
import numpy as np
@@ -117,9 +118,8 @@ def _parse_entry(field_value, field_type):
117118
field_value = np.datetime64(timestamp)
118119
elif field_type == 'BOOLEAN':
119120
field_value = field_value == 'true'
120-
# Note that results are unicode, so this will
121-
# fail for non-ASCII characters.. this probably
122-
# functions differently in Python 3
121+
elif field_type == 'STRING':
122+
field_value = field_value
123123
else:
124124
field_value = str(field_value)
125125
return field_value

pandas/io/tests/test_gbq.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from pandas.core.frame import DataFrame
1313
from pandas.util.testing import with_connectivity_check
14+
from pandas.compat import u
1415
from pandas import NaT
1516

1617

@@ -193,9 +194,28 @@ def test_type_conversion(self):
193194
np.bool(False),
194195
np.int('2'),
195196
np.float('3.14159'),
196-
'Hello World']
197+
u('Hello World')]
197198
self.assertEqual(actual_output, sample_output, 'A format conversion failed')
198199

200+
@with_connectivity_check
201+
def test_unicode_string_conversion(self):
202+
# Strings from BigQuery Should be converted to UTF-8 properly
203+
204+
if not os.path.exists(self.bq_token):
205+
raise nose.SkipTest('Skipped because authentication information is not available.')
206+
207+
correct_test_datatype = DataFrame(
208+
{'UNICODE_STRING' : [u("\xe9\xfc")]}
209+
)
210+
211+
query = """SELECT '\xc3\xa9\xc3\xbc' as UNICODE_STRING"""
212+
213+
client = gbq._authenticate()
214+
a = gbq.read_gbq(query)
215+
tm.assert_frame_equal(a, correct_test_datatype)
216+
217+
218+
199219
def test_data_small(self):
200220
# Parsing a fixed page of data should return the proper fixed np.array()
201221
result_frame = gbq._parse_page(self.test_data_small,

0 commit comments

Comments
 (0)