From 55bf05ccb9fdf8a644ad3f4ea527c0e5ec193300 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Fri, 25 Nov 2016 16:03:36 +0300 Subject: [PATCH 01/26] Added udf_resource_uri parameter to read_gbq Now more complicated queries could be processed. --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/io/gbq.py | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 8e7e95c071ea4..fc668f7f90ced 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -379,6 +379,7 @@ Google BigQuery Enhancements - The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`). - The :func:`~DataFrame.to_gbq` method now allows the DataFrame column order to differ from the destination table schema (:issue:`11359`). +- The :func:`read_gbq` method now allows passing user defined functions .. _whatsnew_0190.errstate: diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 8038cc500f6cd..94d291d408cd0 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -375,7 +375,7 @@ def process_insert_errors(self, insert_errors): raise StreamingInsertError - def run_query(self, query): + def run_query(self, query, udf_resource_uri=None): try: from googleapiclient.errors import HttpError except: @@ -395,6 +395,14 @@ def run_query(self, query): } } } + + if udf_resource_uri is not None: + if not isinstance(udf_resource_uri, list): + udf_resource_uri = [udf_resource_uri] + + job_data['configuration']['query']['userDefinedFunctionResources'] = \ + [{'resourceUri': uri} for uri in udf_resource_uri] + self._start_timer() try: @@ -622,7 +630,7 @@ def _parse_entry(field_value, field_type): def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None, dialect='legacy'): + reauth=False, verbose=True, private_key=None, dialect='legacy', udf_resource_uri=None): """Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -682,6 +690,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, .. versionadded:: 0.19.0 + udf_resource_uri : list(str) or str (optional) + A code resource to load from a Google Cloud Storage URI. + Describes user-defined function resources used in the query. + + .. versionadded:: 0.19.0 + Returns ------- df: DataFrame @@ -698,7 +712,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, private_key=private_key, dialect=dialect) - schema, pages = connector.run_query(query) + schema, pages = connector.run_query(query, udf_resource_uri) dataframe_list = [] while len(pages) > 0: page = pages.pop() From dad92883afe13262529a757e003fe81ba1aa0b07 Mon Sep 17 00:00:00 2001 From: necnec Date: Mon, 28 Nov 2016 12:37:31 +0300 Subject: [PATCH 02/26] Change parameter to kwargs --- pandas/io/gbq.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 94d291d408cd0..fd9c7a6244481 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -375,7 +375,7 @@ def process_insert_errors(self, insert_errors): raise StreamingInsertError - def run_query(self, query, udf_resource_uri=None): + def run_query(self, query, **kwargs): try: from googleapiclient.errors import HttpError except: @@ -395,13 +395,9 @@ def run_query(self, query, udf_resource_uri=None): } } } - - if udf_resource_uri is not None: - if not isinstance(udf_resource_uri, list): - udf_resource_uri = [udf_resource_uri] - - job_data['configuration']['query']['userDefinedFunctionResources'] = \ - [{'resourceUri': uri} for uri in udf_resource_uri] + query_config = kwargs.get('query_config') + if query_config is not None: + job_data['configuration']['query'].update(query_config) self._start_timer() @@ -629,8 +625,9 @@ def _parse_entry(field_value, field_type): return field_value -def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None, dialect='legacy', udf_resource_uri=None): +def read_gbq(query, project_id=None, index_col=None, col_order=None, + reauth=False, verbose=True, private_key=None, dialect='legacy', + **kwargs): """Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -690,9 +687,10 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, .. versionadded:: 0.19.0 - udf_resource_uri : list(str) or str (optional) - A code resource to load from a Google Cloud Storage URI. - Describes user-defined function resources used in the query. + **kwargs: Arbitrary keyword arguments + query_config (dict): query configuration parameters for job processing. + For more information see `BigQuery SQL Reference + ` .. versionadded:: 0.19.0 @@ -712,7 +710,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, private_key=private_key, dialect=dialect) - schema, pages = connector.run_query(query, udf_resource_uri) + schema, pages = connector.run_query(query, **kwargs) dataframe_list = [] while len(pages) > 0: page = pages.pop() From f9fae0c53a4b53533ed8fcae3b97ac0ab8ec4e3c Mon Sep 17 00:00:00 2001 From: necnec Date: Mon, 28 Nov 2016 15:28:51 +0300 Subject: [PATCH 03/26] Fix formatting --- pandas/io/gbq.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index fd9c7a6244481..fb66e9406c008 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -399,7 +399,6 @@ def run_query(self, query, **kwargs): if query_config is not None: job_data['configuration']['query'].update(query_config) - self._start_timer() try: self._print('Requesting query... ', end="") @@ -625,8 +624,8 @@ def _parse_entry(field_value, field_type): return field_value -def read_gbq(query, project_id=None, index_col=None, col_order=None, - reauth=False, verbose=True, private_key=None, dialect='legacy', +def read_gbq(query, project_id=None, index_col=None, col_order=None, + reauth=False, verbose=True, private_key=None, dialect='legacy', **kwargs): """Load data from Google BigQuery. From c66169d424f932bb89371fc1a50a22e3ad0cb77f Mon Sep 17 00:00:00 2001 From: necnec Date: Tue, 29 Nov 2016 00:10:31 +0300 Subject: [PATCH 04/26] add read_gbq tests: query parameters and cache --- pandas/io/tests/test_gbq.py | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index f6ff35a6db0d1..70308dbfe4076 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -711,6 +711,53 @@ def test_invalid_option_for_sql_dialect(self): gbq.read_gbq(sql_statement, project_id=_get_project_id(), dialect='standard', private_key=_get_private_key_path()) + def test_query_with_parameters(self): + sql_statement = "SELECT @param1 + @param2 as VALID_RESULT" + query_config = { + "useLegacySql":False, + "parameterMode":"named", + "queryParameters": [ + { + "name": "param1", + "parameterType": { + "type": "INTEGER" + }, + "parameterValue": { + "value": 1 + } + }, + { + "name": "param2", + "parameterType": { + "type": "INTEGER" + }, + "parameterValue": { + "value": 2 + } + } + ] + } + # Test that an invalid query without query_config + with tm.assertRaises(ValueError): + gbq.read_gbq(sql_statement, project_id=_get_project_id(), + private_key=_get_private_key_path()) + + # Test that a correct query with query config + df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), + private_key=_get_private_key_path(), + query_config=query_config) + tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) + + def test_query_no_cache(self): + query = 'SELECT "PI" as VALID_STRING' + query_config = { + "useQueryCache":False, + } + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path(), + query_config=query_config) + tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + class TestToGBQIntegration(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 From a96811d778b2bb965e352d0798a5c3c2fe014552 Mon Sep 17 00:00:00 2001 From: necnec Date: Tue, 29 Nov 2016 00:14:54 +0300 Subject: [PATCH 05/26] add unit tests read_gbq: query parameters, cache --- pandas/io/tests/test_gbq.py | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index f6ff35a6db0d1..70308dbfe4076 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -711,6 +711,53 @@ def test_invalid_option_for_sql_dialect(self): gbq.read_gbq(sql_statement, project_id=_get_project_id(), dialect='standard', private_key=_get_private_key_path()) + def test_query_with_parameters(self): + sql_statement = "SELECT @param1 + @param2 as VALID_RESULT" + query_config = { + "useLegacySql":False, + "parameterMode":"named", + "queryParameters": [ + { + "name": "param1", + "parameterType": { + "type": "INTEGER" + }, + "parameterValue": { + "value": 1 + } + }, + { + "name": "param2", + "parameterType": { + "type": "INTEGER" + }, + "parameterValue": { + "value": 2 + } + } + ] + } + # Test that an invalid query without query_config + with tm.assertRaises(ValueError): + gbq.read_gbq(sql_statement, project_id=_get_project_id(), + private_key=_get_private_key_path()) + + # Test that a correct query with query config + df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), + private_key=_get_private_key_path(), + query_config=query_config) + tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) + + def test_query_no_cache(self): + query = 'SELECT "PI" as VALID_STRING' + query_config = { + "useQueryCache":False, + } + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path(), + query_config=query_config) + tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + class TestToGBQIntegration(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 From ad35a436494f1c2dc52e5a32e44cfed0c54d2c12 Mon Sep 17 00:00:00 2001 From: necnec Date: Tue, 29 Nov 2016 00:20:12 +0300 Subject: [PATCH 06/26] fix whatsnew text --- doc/source/whatsnew/v0.19.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index fc668f7f90ced..d7efaa4b25c64 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -379,7 +379,7 @@ Google BigQuery Enhancements - The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`). - The :func:`~DataFrame.to_gbq` method now allows the DataFrame column order to differ from the destination table schema (:issue:`11359`). -- The :func:`read_gbq` method now allows passing user defined functions +- The :func:`read_gbq` method now allows query configuration preferences .. _whatsnew_0190.errstate: From 94fa51467ee85f3abfc8f5fb0ca4aecc337a9975 Mon Sep 17 00:00:00 2001 From: necnec Date: Tue, 29 Nov 2016 10:29:06 +0300 Subject: [PATCH 07/26] test formatting --- pandas/io/tests/test_gbq.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 70308dbfe4076..4b9039a2052d9 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -712,10 +712,10 @@ def test_invalid_option_for_sql_dialect(self): dialect='standard', private_key=_get_private_key_path()) def test_query_with_parameters(self): - sql_statement = "SELECT @param1 + @param2 as VALID_RESULT" + sql_statement = "SELECT @param1 + @param2 as VALID_RESULT" query_config = { - "useLegacySql":False, - "parameterMode":"named", + "useLegacySql": False, + "parameterMode": "named", "queryParameters": [ { "name": "param1", @@ -744,14 +744,14 @@ def test_query_with_parameters(self): # Test that a correct query with query config df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path(), - query_config=query_config) + private_key=_get_private_key_path(), + query_config=query_config) tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) def test_query_no_cache(self): query = 'SELECT "PI" as VALID_STRING' query_config = { - "useQueryCache":False, + "useQueryCache": False, } df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path(), From d69ed7f261c0351c20cc4cf79c136e706ee36f93 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Tue, 29 Nov 2016 11:47:11 +0300 Subject: [PATCH 08/26] check tests --- pandas/io/tests/test_gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 4b9039a2052d9..344d4243d3819 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -707,7 +707,7 @@ def test_invalid_option_for_sql_dialect(self): private_key=_get_private_key_path()) # Test that a correct option for dialect succeeds - # to make sure ValueError was due to invalid dialect + # to make sure ValueError was due to invalid dialect gbq.read_gbq(sql_statement, project_id=_get_project_id(), dialect='standard', private_key=_get_private_key_path()) From 640be7a5f3d925782ae50c5b62b50a03c3c1d2f8 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Wed, 30 Nov 2016 11:31:01 +0300 Subject: [PATCH 09/26] Change whatnew 0.19.0->0.19.2 --- doc/source/whatsnew/v0.19.0.txt | 1 - doc/source/whatsnew/v0.19.2.txt | 7 +++++++ pandas/io/gbq.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index d7efaa4b25c64..8e7e95c071ea4 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -379,7 +379,6 @@ Google BigQuery Enhancements - The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`). - The :func:`~DataFrame.to_gbq` method now allows the DataFrame column order to differ from the destination table schema (:issue:`11359`). -- The :func:`read_gbq` method now allows query configuration preferences .. _whatsnew_0190.errstate: diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 5a255d1e62043..07bb20c22b913 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -65,3 +65,10 @@ Bug Fixes - Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) + +.. _whatsnew_0192.gbq: + +Google BigQuery Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- The :func:`read_gbq` method now allows query configuration preferences diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index fb66e9406c008..bdee637967944 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -691,7 +691,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, For more information see `BigQuery SQL Reference ` - .. versionadded:: 0.19.0 + .. versionadded:: 0.19.2 Returns ------- From b849300fa50f8b8173eecc6701572b1a2e21fa5f Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Wed, 30 Nov 2016 18:33:32 +0300 Subject: [PATCH 10/26] Change whatsnew 0.19.2 -> 0.20.0 --- doc/source/whatsnew/v0.19.2.txt | 7 ------- doc/source/whatsnew/v0.20.0.txt | 7 +++++++ pandas/io/gbq.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 07bb20c22b913..5a255d1e62043 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -65,10 +65,3 @@ Bug Fixes - Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) - -.. _whatsnew_0192.gbq: - -Google BigQuery Enhancements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- The :func:`read_gbq` method now allows query configuration preferences diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 03e0cae6cc83f..a01c5228bf081 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -83,3 +83,10 @@ Performance Improvements Bug Fixes ~~~~~~~~~ + +.. _whatsnew_0200.gbq: + +Google BigQuery Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- The :func:`read_gbq` method now allows query configuration preferences \ No newline at end of file diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index bdee637967944..452bac3e97f6b 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -691,7 +691,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, For more information see `BigQuery SQL Reference ` - .. versionadded:: 0.19.2 + .. versionadded:: 0.20.0 Returns ------- From a952710846ff1340bf1f0a664c5e0361f11ba8df Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Fri, 2 Dec 2016 14:46:47 +0300 Subject: [PATCH 11/26] Move whatsnew BQ Enhancements -> Enhancements --- doc/source/whatsnew/v0.20.0.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a01c5228bf081..f1a1310f2a901 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -31,6 +31,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) +- ``pd.io.gbq.read_gbq`` method now allows query configuration preferences .. _whatsnew_0200.api_breaking: @@ -84,9 +85,4 @@ Performance Improvements Bug Fixes ~~~~~~~~~ -.. _whatsnew_0200.gbq: -Google BigQuery Enhancements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- The :func:`read_gbq` method now allows query configuration preferences \ No newline at end of file From 0b365da6cc1dca3ce0b4b9a3e69521b31a8bdd3a Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Fri, 2 Dec 2016 14:48:45 +0300 Subject: [PATCH 12/26] delete newlines --- doc/source/whatsnew/v0.20.0.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f1a1310f2a901..9fde28e302b2c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -83,6 +83,4 @@ Performance Improvements .. _whatsnew_0200.bug_fixes: Bug Fixes -~~~~~~~~~ - - +~~~~~~~~~ \ No newline at end of file From c199935bc7ad0af757d21b490e78a186bd68b24e Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Mon, 5 Dec 2016 14:51:08 +0300 Subject: [PATCH 13/26] Make query configuration more general --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/io/gbq.py | 11 +++--- pandas/io/tests/test_gbq.py | 62 ++++++++++++++++++--------------- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9fde28e302b2c..ea02c1e79180f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -83,4 +83,4 @@ Performance Improvements .. _whatsnew_0200.bug_fixes: Bug Fixes -~~~~~~~~~ \ No newline at end of file +~~~~~~~~~ diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 452bac3e97f6b..aa4273745f5ff 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -395,9 +395,12 @@ def run_query(self, query, **kwargs): } } } - query_config = kwargs.get('query_config') - if query_config is not None: - job_data['configuration']['query'].update(query_config) + configuration = kwargs.get('configuration') + if configuration is not None: + if 'query' in configuration: + job_data['configuration']['query'].update(configuration['query']) + else: + job_data['configuration'] = configuration self._start_timer() try: @@ -687,7 +690,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, .. versionadded:: 0.19.0 **kwargs: Arbitrary keyword arguments - query_config (dict): query configuration parameters for job processing. + configuration (dict): query configuration parameters for job processing. For more information see `BigQuery SQL Reference ` diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 344d4243d3819..e0c8a4eaaa0b3 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -713,31 +713,33 @@ def test_invalid_option_for_sql_dialect(self): def test_query_with_parameters(self): sql_statement = "SELECT @param1 + @param2 as VALID_RESULT" - query_config = { - "useLegacySql": False, - "parameterMode": "named", - "queryParameters": [ - { - "name": "param1", - "parameterType": { - "type": "INTEGER" + config = { + 'query': { + "useLegacySql": False, + "parameterMode": "named", + "queryParameters": [ + { + "name": "param1", + "parameterType": { + "type": "INTEGER" + }, + "parameterValue": { + "value": 1 + } }, - "parameterValue": { - "value": 1 + { + "name": "param2", + "parameterType": { + "type": "INTEGER" + }, + "parameterValue": { + "value": 2 + } } - }, - { - "name": "param2", - "parameterType": { - "type": "INTEGER" - }, - "parameterValue": { - "value": 2 - } - } - ] + ] + } } - # Test that an invalid query without query_config + # Test that an invalid query without query config with tm.assertRaises(ValueError): gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path()) @@ -745,17 +747,21 @@ def test_query_with_parameters(self): # Test that a correct query with query config df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), - query_config=query_config) + configuration=config) tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) - def test_query_no_cache(self): + def test_query_inside_configuration(self): + query_no_use = 'SELECT "PI_WRONG" as VALID_STRING' query = 'SELECT "PI" as VALID_STRING' - query_config = { - "useQueryCache": False, + config = { + 'query': { + "query": query, + "useQueryCache": False, + } } - df = gbq.read_gbq(query, project_id=_get_project_id(), + df = gbq.read_gbq(query_no_use, project_id=_get_project_id(), private_key=_get_private_key_path(), - query_config=query_config) + configuration=config) tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) From 028c8bef6ebe44b29850a1f7c3b45c3ec68df9a7 Mon Sep 17 00:00:00 2001 From: necnec Date: Tue, 6 Dec 2016 01:07:27 +0300 Subject: [PATCH 14/26] Solve formating problems --- pandas/io/gbq.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index aa4273745f5ff..0f9023586cfb5 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -398,7 +398,8 @@ def run_query(self, query, **kwargs): configuration = kwargs.get('configuration') if configuration is not None: if 'query' in configuration: - job_data['configuration']['query'].update(configuration['query']) + job_data['configuration']['query']\ + .update(configuration['query']) else: job_data['configuration'] = configuration @@ -690,7 +691,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, .. versionadded:: 0.19.0 **kwargs: Arbitrary keyword arguments - configuration (dict): query configuration parameters for job processing. + configuration (dict): query config parameters for job processing. For more information see `BigQuery SQL Reference ` From 395c0e95a9b1657e934664ec6b661b56541a8742 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Mon, 12 Dec 2016 14:14:41 +0300 Subject: [PATCH 15/26] fix formatting --- pandas/io/gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 0f9023586cfb5..87da2433fcc4f 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -399,7 +399,7 @@ def run_query(self, query, **kwargs): if configuration is not None: if 'query' in configuration: job_data['configuration']['query']\ - .update(configuration['query']) + .update(configuration['query']) else: job_data['configuration'] = configuration From 8a38650e6bd56bafacc3ca69bb6046283ba62e1f Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Mon, 12 Dec 2016 16:57:54 +0300 Subject: [PATCH 16/26] Added example configuration & job_configuration refactoring --- pandas/io/gbq.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 87da2433fcc4f..49fcb378e0fbc 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -385,23 +385,25 @@ def run_query(self, query, **kwargs): _check_google_client_version() job_collection = self.service.jobs() - job_data = { - 'configuration': { - 'query': { - 'query': query, - 'useLegacySql': self.dialect == 'legacy' - # 'allowLargeResults', 'createDisposition', - # 'preserveNulls', destinationTable, useQueryCache - } + + job_config = { + 'query': { + 'query': query, + 'useLegacySql': self.dialect == 'legacy' + # 'allowLargeResults', 'createDisposition', + # 'preserveNulls', destinationTable, useQueryCache } } configuration = kwargs.get('configuration') if configuration is not None: if 'query' in configuration: - job_data['configuration']['query']\ - .update(configuration['query']) + job_config['query'].update(configuration['query']) else: - job_data['configuration'] = configuration + job_config = configuration + + job_data = { + 'configuration': job_config + } self._start_timer() try: @@ -692,6 +694,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, **kwargs: Arbitrary keyword arguments configuration (dict): query config parameters for job processing. + For example: + configuration = {'query': {'useQueryCache': False}} For more information see `BigQuery SQL Reference ` From 929ad1ae36f79740157cec439e431d3b57172c22 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Tue, 13 Dec 2016 10:45:26 +0300 Subject: [PATCH 17/26] formatting: delete whitespace --- pandas/io/gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 49fcb378e0fbc..f98f91b251b3e 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -694,7 +694,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, **kwargs: Arbitrary keyword arguments configuration (dict): query config parameters for job processing. - For example: + For example: configuration = {'query': {'useQueryCache': False}} For more information see `BigQuery SQL Reference ` From 0ac26a29693133586ef38c0c421da663192f9df4 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Wed, 14 Dec 2016 14:32:10 +0300 Subject: [PATCH 18/26] added pull request number in whitens --- doc/source/whatsnew/v0.20.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 487c227d52c11..0019d7a274822 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -50,7 +50,8 @@ Other enhancements - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) -- ``pd.io.gbq.read_gbq`` method now allows query configuration preferences + +- ``pd.io.gbq.read_gbq`` method now allows query configuration preferences (:issue:`14742`) - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack From 99521aa327a5552633154ce15cb107c583d61dc6 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Wed, 21 Dec 2016 14:48:30 +0300 Subject: [PATCH 19/26] Formatting, documentation, new unit test --- doc/source/io.rst | 14 +++++++++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/io/gbq.py | 19 ++++++++++----- pandas/io/tests/test_gbq.py | 42 +++++++++++++++++++++++++++++---- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 75f36c5274cd2..cbd54b17f56b3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4562,6 +4562,20 @@ destination DataFrame as well as a preferred column order as follows: index_col='index_column_name', col_order=['col1', 'col2', 'col3'], projectid) + +You can specify the query configuration as parameter + +.. code-block:: python + + config = { + 'query': { + "useQueryCache": False + } + } + data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', + configuration=config, projectid) + + .. note:: You can find your project id in the `Google developers console `__. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0019d7a274822..4389dda44b077 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -51,7 +51,7 @@ Other enhancements - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) -- ``pd.io.gbq.read_gbq`` method now allows query configuration preferences (:issue:`14742`) +- ``pd.read_gbq`` method now allows query configuration preferences (:issue:`14742`) - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index f98f91b251b3e..4782d8148556b 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -397,9 +397,14 @@ def run_query(self, query, **kwargs): configuration = kwargs.get('configuration') if configuration is not None: if 'query' in configuration: + if 'query' in configuration['query'] and query is not None: + raise ValueError("Query statement can't be specified \ + inside config while it is specified as parameter") + job_config['query'].update(configuration['query']) else: - job_config = configuration + raise ValueError("Only 'query' job type is supported") + #job_config = configuration job_data = { 'configuration': job_config @@ -633,7 +638,7 @@ def _parse_entry(field_value, field_type): def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True, private_key=None, dialect='legacy', **kwargs): - """Load data from Google BigQuery. + r"""Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -692,11 +697,13 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, .. versionadded:: 0.19.0 - **kwargs: Arbitrary keyword arguments + **kwargs : Arbitrary keyword arguments configuration (dict): query config parameters for job processing. - For example: - configuration = {'query': {'useQueryCache': False}} - For more information see `BigQuery SQL Reference + For example: + + configuration = {'query': {'useQueryCache': False}} + + For more information see `BigQuery SQL Reference ` .. versionadded:: 0.20.0 diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index e0c8a4eaaa0b3..adf1f92a96225 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -739,12 +739,14 @@ def test_query_with_parameters(self): ] } } - # Test that an invalid query without query config + # Test that a query that relies on parameters fails + # when parameters are not supplied via configuration with tm.assertRaises(ValueError): gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path()) - # Test that a correct query with query config + # Test that the query is successful because we have supplied + # the correct query parameters via the 'configuration' option df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), configuration=config) @@ -759,11 +761,41 @@ def test_query_inside_configuration(self): "useQueryCache": False, } } - df = gbq.read_gbq(query_no_use, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) + # Test that it can't pass query both + # inside configuration and as parameter + with tm.assertRaises(ValueError): + gbq.read_gbq(query_no_use, project_id=_get_project_id(), + private_key=_get_private_key_path(), + configuration=config) + + df = gbq.read_gbq(None, project_id=_get_project_id(), + private_key=_get_private_key_path(), + configuration=config) tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + def test_configuration_without_query(self): + sql_statement = 'SELECT 1' + config = { + 'copy': { + "sourceTable": { + "projectId": _get_project_id(), + "datasetId": "publicdata:samples", + "tableId": "wikipedia" + }, + "destinationTable": { + "projectId": _get_project_id(), + "datasetId": "publicdata:samples", + "tableId": "wikipedia_copied" + }, + } + } + # Test that only 'query' configurations are supported + # nor 'copy','load','extract' + with tm.assertRaises(ValueError): + gbq.read_gbq(sql_statement, project_id=_get_project_id(), + private_key=_get_private_key_path(), + configuration=config) + class TestToGBQIntegration(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 From df5dec6ba1785c7953595011582e17e55d3fa68e Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Thu, 22 Dec 2016 11:21:42 +0300 Subject: [PATCH 20/26] configuration->config & formatting --- doc/source/io.rst | 4 ++-- pandas/io/gbq.py | 22 +++++++++++----------- pandas/io/tests/test_gbq.py | 18 +++++++++--------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index cbd54b17f56b3..5fc73de94068e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4563,7 +4563,7 @@ destination DataFrame as well as a preferred column order as follows: col_order=['col1', 'col2', 'col3'], projectid) -You can specify the query configuration as parameter +You can specify the query config as parameter .. code-block:: python @@ -4573,7 +4573,7 @@ You can specify the query configuration as parameter } } data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - configuration=config, projectid) + config=config, projectid) .. note:: diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 4782d8148556b..9942adb3fcc00 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -394,17 +394,17 @@ def run_query(self, query, **kwargs): # 'preserveNulls', destinationTable, useQueryCache } } - configuration = kwargs.get('configuration') - if configuration is not None: - if 'query' in configuration: - if 'query' in configuration['query'] and query is not None: - raise ValueError("Query statement can't be specified \ - inside config while it is specified as parameter") - - job_config['query'].update(configuration['query']) + config = kwargs.get('config') + if config is not None: + if 'query' in config: + if 'query' in config['query'] and query is not None: + raise ValueError("Query statement can't be specified " + "inside config while it is specified " + "as parameter") + + job_config['query'].update(config['query']) else: raise ValueError("Only 'query' job type is supported") - #job_config = configuration job_data = { 'configuration': job_config @@ -698,10 +698,10 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, .. versionadded:: 0.19.0 **kwargs : Arbitrary keyword arguments - configuration (dict): query config parameters for job processing. + config (dict): query config parameters for job processing. For example: - configuration = {'query': {'useQueryCache': False}} + config = {'query': {'useQueryCache': False}} For more information see `BigQuery SQL Reference ` diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index adf1f92a96225..7116b9b2ab325 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -707,7 +707,7 @@ def test_invalid_option_for_sql_dialect(self): private_key=_get_private_key_path()) # Test that a correct option for dialect succeeds - # to make sure ValueError was due to invalid dialect + # to make sure ValueError was due to invalid dialect gbq.read_gbq(sql_statement, project_id=_get_project_id(), dialect='standard', private_key=_get_private_key_path()) @@ -746,10 +746,10 @@ def test_query_with_parameters(self): private_key=_get_private_key_path()) # Test that the query is successful because we have supplied - # the correct query parameters via the 'configuration' option + # the correct query parameters via the 'config' option df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), - configuration=config) + config=config) tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) def test_query_inside_configuration(self): @@ -762,15 +762,15 @@ def test_query_inside_configuration(self): } } # Test that it can't pass query both - # inside configuration and as parameter + # inside config and as parameter with tm.assertRaises(ValueError): gbq.read_gbq(query_no_use, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) + private_key=_get_private_key_path(), + config=config) df = gbq.read_gbq(None, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) + private_key=_get_private_key_path(), + config=config) tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) def test_configuration_without_query(self): @@ -794,7 +794,7 @@ def test_configuration_without_query(self): with tm.assertRaises(ValueError): gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), - configuration=config) + config=config) class TestToGBQIntegration(tm.TestCase): From 8720b03eec372860001bffe9fa8a1b5dd80a9f17 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Thu, 22 Dec 2016 16:33:11 +0300 Subject: [PATCH 21/26] Delete trailing whitespaces --- pandas/io/tests/test_gbq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 7116b9b2ab325..ce99290578896 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -739,13 +739,13 @@ def test_query_with_parameters(self): ] } } - # Test that a query that relies on parameters fails + # Test that a query that relies on parameters fails # when parameters are not supplied via configuration with tm.assertRaises(ValueError): gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path()) - # Test that the query is successful because we have supplied + # Test that the query is successful because we have supplied # the correct query parameters via the 'config' option df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), @@ -761,7 +761,7 @@ def test_query_inside_configuration(self): "useQueryCache": False, } } - # Test that it can't pass query both + # Test that it can't pass query both # inside config and as parameter with tm.assertRaises(ValueError): gbq.read_gbq(query_no_use, project_id=_get_project_id(), From ec590af0ba8f2bc0cb40ade8faebdf6e2908ed77 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Thu, 29 Dec 2016 15:15:13 +0300 Subject: [PATCH 22/26] Throw exception if more than 1 job type in config --- pandas/io/gbq.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 9942adb3fcc00..40a3c97b0f6bf 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -396,6 +396,9 @@ def run_query(self, query, **kwargs): } config = kwargs.get('config') if config is not None: + if len(config) != 1: + raise ValueError("Only one job type must be specified, " + "but given {}".format(','.join(config.keys())) if 'query' in config: if 'query' in config['query'] and query is not None: raise ValueError("Query statement can't be specified " From e2f801f83a8d9d12b407d90bc8b0384adf4d1867 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Thu, 29 Dec 2016 17:13:02 +0300 Subject: [PATCH 23/26] hotfix --- pandas/io/gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 40a3c97b0f6bf..fb965ac0c4dc0 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -398,7 +398,7 @@ def run_query(self, query, **kwargs): if config is not None: if len(config) != 1: raise ValueError("Only one job type must be specified, " - "but given {}".format(','.join(config.keys())) + "but given {}".format(','.join(config.keys()))) if 'query' in config: if 'query' in config['query'] and query is not None: raise ValueError("Query statement can't be specified " From b97a1bea30ef0f8069fbd31e9b51d8a1d95fbda7 Mon Sep 17 00:00:00 2001 From: Dmitry L Date: Fri, 30 Dec 2016 11:37:27 +0300 Subject: [PATCH 24/26] formatting --- pandas/io/gbq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index fb965ac0c4dc0..92f3624d750e4 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -397,8 +397,8 @@ def run_query(self, query, **kwargs): config = kwargs.get('config') if config is not None: if len(config) != 1: - raise ValueError("Only one job type must be specified, " - "but given {}".format(','.join(config.keys()))) + raise ValueError("Only one job type must be specified, but " + "given {}".format(','.join(config.keys()))) if 'query' in config: if 'query' in config['query'] and query is not None: raise ValueError("Query statement can't be specified " From 82f44098cbd4443a3891c913f6eaf5e327abdc01 Mon Sep 17 00:00:00 2001 From: necnec Date: Mon, 2 Jan 2017 21:12:52 +0300 Subject: [PATCH 25/26] Add some documentation & formatting --- doc/source/io.rst | 4 +++- pandas/io/gbq.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 5030eb8d281fd..417206c584f35 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4650,7 +4650,9 @@ destination DataFrame as well as a preferred column order as follows: col_order=['col1', 'col2', 'col3'], projectid) -You can specify the query config as parameter +Starting with 0.20.0, you can specify the query config as parameter to use additional options of your job. +For more information about query configuration parameters see +`here `__. .. code-block:: python diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 92f3624d750e4..fcf840a2e92b9 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -707,7 +707,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, config = {'query': {'useQueryCache': False}} For more information see `BigQuery SQL Reference - ` + ` .. versionadded:: 0.20.0 From 3a238a540d7e6843ecb52df8f157d6e3125117f5 Mon Sep 17 00:00:00 2001 From: necnec Date: Tue, 3 Jan 2017 13:38:40 +0300 Subject: [PATCH 26/26] config->configuration --- doc/source/io.rst | 4 ++-- pandas/io/gbq.py | 6 +++--- pandas/io/tests/test_gbq.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 417206c584f35..fad20e0a18659 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4656,13 +4656,13 @@ For more information about query configuration parameters see .. code-block:: python - config = { + configuration = { 'query': { "useQueryCache": False } } data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - config=config, projectid) + configuration=configuration, projectid) .. note:: diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index fcf840a2e92b9..966f53e9d75ef 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -394,7 +394,7 @@ def run_query(self, query, **kwargs): # 'preserveNulls', destinationTable, useQueryCache } } - config = kwargs.get('config') + config = kwargs.get('configuration') if config is not None: if len(config) != 1: raise ValueError("Only one job type must be specified, but " @@ -701,10 +701,10 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, .. versionadded:: 0.19.0 **kwargs : Arbitrary keyword arguments - config (dict): query config parameters for job processing. + configuration (dict): query config parameters for job processing. For example: - config = {'query': {'useQueryCache': False}} + configuration = {'query': {'useQueryCache': False}} For more information see `BigQuery SQL Reference ` diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 36d1f1e85d080..ae829f8e0a878 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -749,7 +749,7 @@ def test_query_with_parameters(self): # the correct query parameters via the 'config' option df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), - config=config) + configuration=config) tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) def test_query_inside_configuration(self): @@ -766,11 +766,11 @@ def test_query_inside_configuration(self): with tm.assertRaises(ValueError): gbq.read_gbq(query_no_use, project_id=_get_project_id(), private_key=_get_private_key_path(), - config=config) + configuration=config) df = gbq.read_gbq(None, project_id=_get_project_id(), private_key=_get_private_key_path(), - config=config) + configuration=config) tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) def test_configuration_without_query(self): @@ -794,7 +794,7 @@ def test_configuration_without_query(self): with tm.assertRaises(ValueError): gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), - config=config) + configuration=config) class TestToGBQIntegration(tm.TestCase):