Formatting, documentation, new unit test

necnec · necnec · commit 99521aa327a5 · 2016-12-21T14:48:30.000+03:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -4562,6 +4562,20 @@ destination DataFrame as well as a preferred column order as follows:
                              index_col='index_column_name',
                              col_order=['col1', 'col2', 'col3'], projectid)
 
+
+You can specify the query configuration as parameter
+
+.. code-block:: python
+
+   config = {
+      'query': {
+        "useQueryCache": False
+      }
+   }
+   data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table',
+                             configuration=config, projectid)
+
+
 .. note::
 
    You can find your project id in the `Google developers console <https://console.developers.google.com>`__.
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -51,7 +51,7 @@ Other enhancements
 
 - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
 
-- ``pd.io.gbq.read_gbq`` method now allows query configuration preferences (:issue:`14742`)
+- ``pd.read_gbq`` method now allows query configuration preferences (:issue:`14742`)
 
 - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an
   unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
@@ -397,9 +397,14 @@ def run_query(self, query, **kwargs):
         configuration = kwargs.get('configuration')
         if configuration is not None:
             if 'query' in configuration:
+                if 'query' in configuration['query'] and query is not None:
+                    raise ValueError("Query statement can't be specified \
+                        inside config while it is specified as parameter")
+
                 job_config['query'].update(configuration['query'])
             else:
-                job_config = configuration
+                raise ValueError("Only 'query' job type is supported")
+                #job_config = configuration
 
         job_data = {
             'configuration': job_config
@@ -633,7 +638,7 @@ def _parse_entry(field_value, field_type):
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
              reauth=False, verbose=True, private_key=None, dialect='legacy',
              **kwargs):
-    """Load data from Google BigQuery.
+    r"""Load data from Google BigQuery.
 
     THIS IS AN EXPERIMENTAL LIBRARY
 
@@ -692,11 +697,13 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
 
         .. versionadded:: 0.19.0
 
-    **kwargs: Arbitrary keyword arguments
+    **kwargs : Arbitrary keyword arguments
         configuration (dict): query config parameters for job processing.
-            For example:
-                configuration = {'query': {'useQueryCache': False}}
-            For more information see `BigQuery SQL Reference
+        For example:
+
+            configuration = {'query': {'useQueryCache': False}}
+
+        For more information see `BigQuery SQL Reference
             <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`
 
         .. versionadded:: 0.20.0
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
@@ -739,12 +739,14 @@ def test_query_with_parameters(self):
                 ]
             }
         }
-        # Test that an invalid query without query config
+        # Test that a query that relies on parameters fails 
+        # when parameters are not supplied via configuration
         with tm.assertRaises(ValueError):
             gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                          private_key=_get_private_key_path())
 
-        # Test that a correct query with query config
+        # Test that the query is successful because we have supplied 
+        # the correct query parameters via the 'configuration' option
         df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                           private_key=_get_private_key_path(),
                           configuration=config)
@@ -759,11 +761,41 @@ def test_query_inside_configuration(self):
                 "useQueryCache": False,
             }
         }
-        df = gbq.read_gbq(query_no_use, project_id=_get_project_id(),
-                          private_key=_get_private_key_path(),
-                          configuration=config)
+        # Test that it can't pass query both 
+        # inside configuration and as parameter
+        with tm.assertRaises(ValueError):
+            gbq.read_gbq(query_no_use, project_id=_get_project_id(),
+                              private_key=_get_private_key_path(),
+                              configuration=config)
+
+        df = gbq.read_gbq(None, project_id=_get_project_id(),
+                              private_key=_get_private_key_path(),
+                              configuration=config)
         tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
 
+    def test_configuration_without_query(self):
+        sql_statement = 'SELECT 1'
+        config = {
+            'copy': {
+                "sourceTable": {
+                    "projectId": _get_project_id(),
+                    "datasetId": "publicdata:samples",
+                    "tableId": "wikipedia"
+                },
+                "destinationTable": {
+                    "projectId": _get_project_id(),
+                    "datasetId": "publicdata:samples",
+                    "tableId": "wikipedia_copied"
+                },
+            }
+        }
+        # Test that only 'query' configurations are supported
+        # nor 'copy','load','extract'
+        with tm.assertRaises(ValueError):
+            gbq.read_gbq(sql_statement, project_id=_get_project_id(),
+                         private_key=_get_private_key_path(),
+                         configuration=config)
+
 
 class TestToGBQIntegration(tm.TestCase):
     # Changes to BigQuery table schema may take up to 2 minutes as of May 2015