Skip to content

Commit 99521aa

Browse files
committed
Formatting, documentation, new unit test
1 parent 0ac26a2 commit 99521aa

File tree

4 files changed

+65
-12
lines changed

4 files changed

+65
-12
lines changed

doc/source/io.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4562,6 +4562,20 @@ destination DataFrame as well as a preferred column order as follows:
45624562
index_col='index_column_name',
45634563
col_order=['col1', 'col2', 'col3'], projectid)
45644564
4565+
4566+
You can specify the query configuration as parameter
4567+
4568+
.. code-block:: python
4569+
4570+
config = {
4571+
'query': {
4572+
"useQueryCache": False
4573+
}
4574+
}
4575+
data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table',
4576+
configuration=config, projectid)
4577+
4578+
45654579
.. note::
45664580

45674581
You can find your project id in the `Google developers console <https://console.developers.google.com>`__.

doc/source/whatsnew/v0.20.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Other enhancements
5151

5252
- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
5353

54-
- ``pd.io.gbq.read_gbq`` method now allows query configuration preferences (:issue:`14742`)
54+
- ``pd.read_gbq`` method now allows query configuration preferences (:issue:`14742`)
5555

5656
- New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an
5757
unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack

pandas/io/gbq.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -397,9 +397,14 @@ def run_query(self, query, **kwargs):
397397
configuration = kwargs.get('configuration')
398398
if configuration is not None:
399399
if 'query' in configuration:
400+
if 'query' in configuration['query'] and query is not None:
401+
raise ValueError("Query statement can't be specified \
402+
inside config while it is specified as parameter")
403+
400404
job_config['query'].update(configuration['query'])
401405
else:
402-
job_config = configuration
406+
raise ValueError("Only 'query' job type is supported")
407+
#job_config = configuration
403408

404409
job_data = {
405410
'configuration': job_config
@@ -633,7 +638,7 @@ def _parse_entry(field_value, field_type):
633638
def read_gbq(query, project_id=None, index_col=None, col_order=None,
634639
reauth=False, verbose=True, private_key=None, dialect='legacy',
635640
**kwargs):
636-
"""Load data from Google BigQuery.
641+
r"""Load data from Google BigQuery.
637642
638643
THIS IS AN EXPERIMENTAL LIBRARY
639644
@@ -692,11 +697,13 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
692697
693698
.. versionadded:: 0.19.0
694699
695-
**kwargs: Arbitrary keyword arguments
700+
**kwargs : Arbitrary keyword arguments
696701
configuration (dict): query config parameters for job processing.
697-
For example:
698-
configuration = {'query': {'useQueryCache': False}}
699-
For more information see `BigQuery SQL Reference
702+
For example:
703+
704+
configuration = {'query': {'useQueryCache': False}}
705+
706+
For more information see `BigQuery SQL Reference
700707
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`
701708
702709
.. versionadded:: 0.20.0

pandas/io/tests/test_gbq.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,12 +739,14 @@ def test_query_with_parameters(self):
739739
]
740740
}
741741
}
742-
# Test that an invalid query without query config
742+
# Test that a query that relies on parameters fails
743+
# when parameters are not supplied via configuration
743744
with tm.assertRaises(ValueError):
744745
gbq.read_gbq(sql_statement, project_id=_get_project_id(),
745746
private_key=_get_private_key_path())
746747

747-
# Test that a correct query with query config
748+
# Test that the query is successful because we have supplied
749+
# the correct query parameters via the 'configuration' option
748750
df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
749751
private_key=_get_private_key_path(),
750752
configuration=config)
@@ -759,11 +761,41 @@ def test_query_inside_configuration(self):
759761
"useQueryCache": False,
760762
}
761763
}
762-
df = gbq.read_gbq(query_no_use, project_id=_get_project_id(),
763-
private_key=_get_private_key_path(),
764-
configuration=config)
764+
# Test that it can't pass query both
765+
# inside configuration and as parameter
766+
with tm.assertRaises(ValueError):
767+
gbq.read_gbq(query_no_use, project_id=_get_project_id(),
768+
private_key=_get_private_key_path(),
769+
configuration=config)
770+
771+
df = gbq.read_gbq(None, project_id=_get_project_id(),
772+
private_key=_get_private_key_path(),
773+
configuration=config)
765774
tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
766775

776+
def test_configuration_without_query(self):
777+
sql_statement = 'SELECT 1'
778+
config = {
779+
'copy': {
780+
"sourceTable": {
781+
"projectId": _get_project_id(),
782+
"datasetId": "publicdata:samples",
783+
"tableId": "wikipedia"
784+
},
785+
"destinationTable": {
786+
"projectId": _get_project_id(),
787+
"datasetId": "publicdata:samples",
788+
"tableId": "wikipedia_copied"
789+
},
790+
}
791+
}
792+
# Test that only 'query' configurations are supported
793+
# nor 'copy','load','extract'
794+
with tm.assertRaises(ValueError):
795+
gbq.read_gbq(sql_statement, project_id=_get_project_id(),
796+
private_key=_get_private_key_path(),
797+
configuration=config)
798+
767799

768800
class TestToGBQIntegration(tm.TestCase):
769801
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015

0 commit comments

Comments
 (0)