From a62b19039cc48bab34d67d6393db4e6384a81ffd Mon Sep 17 00:00:00 2001
From: John Paton <john@johnpaton.net>
Date: Tue, 12 Mar 2019 17:52:08 +0100
Subject: [PATCH 1/6] ENH: Allow partial table schema in to_gbq

---
 pandas_gbq/gbq.py         | 11 +++++++++--
 pandas_gbq/schema.py      | 30 ++++++++++++++++++++++++++++++
 tests/unit/test_schema.py | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index b59c3f94..1b803880 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -1023,10 +1023,11 @@ def to_gbq(
         credentials=connector.credentials,
     )
 
+    default_schema = _generate_bq_schema(dataframe)
     if not table_schema:
-        table_schema = _generate_bq_schema(dataframe)
+        table_schema = default_schema
     else:
-        table_schema = dict(fields=table_schema)
+        table_schema = _update_bq_schema(default_schema, dict(fields=table_schema))
 
     # If table exists, check if_exists parameter
     if table.exists(table_id):
@@ -1091,6 +1092,12 @@ def _generate_bq_schema(df, default_type="STRING"):
     return schema.generate_bq_schema(df, default_type=default_type)
 
 
+def _update_bq_schema(schema_old, schema_new):
+    from pandas_gbq import schema
+
+    return schema.update_schema(schema_old, schema_new)
+
+
 class _Table(GbqConnector):
     def __init__(
         self,
diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py
index 3ca03025..e4e18b02 100644
--- a/pandas_gbq/schema.py
+++ b/pandas_gbq/schema.py
@@ -31,3 +31,33 @@ def generate_bq_schema(dataframe, default_type="STRING"):
         )
 
     return {"fields": fields}
+
+
+def update_schema(schema_old, schema_new):
+    """
+    Given an old BigQuery schema, update it with a new one.
+
+    Where a field name is the same, the new will replace the old. Any
+    new fields not present in the old schema will be added.
+
+    Arguments:
+        schema_old: the old schema to update
+        schema_new: the new schema which will overwrite/extend the old
+    """
+    old_fields = schema_old["fields"]
+    new_fields = schema_new["fields"]
+    output_fields = old_fields.copy()
+
+    field_indices = {field["name"]: i for i, field in enumerate(output_fields)}
+
+    for field in new_fields:
+        name = field["name"]
+        if name in field_indices:
+            # replace old field with new field of same name
+            output_fields[field_indices[name]] = field
+        else:
+            # add new field
+            output_fields.append(field)
+
+    return {"fields": output_fields}
+
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
index 74f22f29..e2747580 100644
--- a/tests/unit/test_schema.py
+++ b/tests/unit/test_schema.py
@@ -54,3 +54,40 @@
 def test_generate_bq_schema(dataframe, expected_schema):
     schema = pandas_gbq.schema.generate_bq_schema(dataframe)
     assert schema == expected_schema
+
+@pytest.mark.parametrize(
+    "schema_old,schema_new,expected_output",
+    [
+        (
+            {"fields": [{"name": "col1", "type": "INTEGER"}]},
+            {"fields": [{"name": "col2", "type": "TIMESTAMP"}]},
+            {"fields": [
+                {"name": "col1", "type": "INTEGER"},
+                {"name": "col2", "type": "TIMESTAMP"}
+            ]},
+        ),
+        (
+            {"fields": [{"name": "col1", "type": "INTEGER"}]},
+            {"fields": [{"name": "col1", "type": "BOOLEAN"}]},
+            {"fields": [{"name": "col1", "type": "BOOLEAN"}]},
+        ),
+        (
+            {"fields": [
+                {"name": "col1", "type": "INTEGER"},
+                {"name": "col2", "type": "INTEGER"}
+            ]},
+            {"fields": [
+                {"name": "col2", "type": "BOOLEAN"},
+                {"name": "col3", "type": "FLOAT"}
+            ]},
+            {"fields": [
+                {"name": "col1", "type": "INTEGER"},
+                {"name": "col2", "type": "BOOLEAN"},
+                {"name": "col3", "type": "FLOAT"}
+            ]},
+        )
+    ]
+)
+def test_update_schema(schema_old, schema_new, expected_output):
+    output = pandas_gbq.schema.update_schema(schema_old, schema_new)
+    assert output == expected_output

From 1d98d2bd79c3fa62182f8b0e2449d21e7b1f198e Mon Sep 17 00:00:00 2001
From: John Paton <john@johnpaton.net>
Date: Tue, 12 Mar 2019 18:01:28 +0100
Subject: [PATCH 2/6] CLN: applied black

---
 pandas_gbq/gbq.py         |  4 +++-
 pandas_gbq/schema.py      |  1 -
 tests/unit/test_schema.py | 47 +++++++++++++++++++++++----------------
 3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 1b803880..113bfda3 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -1027,7 +1027,9 @@ def to_gbq(
     if not table_schema:
         table_schema = default_schema
     else:
-        table_schema = _update_bq_schema(default_schema, dict(fields=table_schema))
+        table_schema = _update_bq_schema(
+            default_schema, dict(fields=table_schema)
+        )
 
     # If table exists, check if_exists parameter
     if table.exists(table_id):
diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py
index e4e18b02..e8246d17 100644
--- a/pandas_gbq/schema.py
+++ b/pandas_gbq/schema.py
@@ -60,4 +60,3 @@ def update_schema(schema_old, schema_new):
             output_fields.append(field)
 
     return {"fields": output_fields}
-
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
index e2747580..af3b2043 100644
--- a/tests/unit/test_schema.py
+++ b/tests/unit/test_schema.py
@@ -55,16 +55,19 @@ def test_generate_bq_schema(dataframe, expected_schema):
     schema = pandas_gbq.schema.generate_bq_schema(dataframe)
     assert schema == expected_schema
 
+
 @pytest.mark.parametrize(
     "schema_old,schema_new,expected_output",
     [
         (
             {"fields": [{"name": "col1", "type": "INTEGER"}]},
             {"fields": [{"name": "col2", "type": "TIMESTAMP"}]},
-            {"fields": [
-                {"name": "col1", "type": "INTEGER"},
-                {"name": "col2", "type": "TIMESTAMP"}
-            ]},
+            {
+                "fields": [
+                    {"name": "col1", "type": "INTEGER"},
+                    {"name": "col2", "type": "TIMESTAMP"},
+                ]
+            },
         ),
         (
             {"fields": [{"name": "col1", "type": "INTEGER"}]},
@@ -72,21 +75,27 @@ def test_generate_bq_schema(dataframe, expected_schema):
             {"fields": [{"name": "col1", "type": "BOOLEAN"}]},
         ),
         (
-            {"fields": [
-                {"name": "col1", "type": "INTEGER"},
-                {"name": "col2", "type": "INTEGER"}
-            ]},
-            {"fields": [
-                {"name": "col2", "type": "BOOLEAN"},
-                {"name": "col3", "type": "FLOAT"}
-            ]},
-            {"fields": [
-                {"name": "col1", "type": "INTEGER"},
-                {"name": "col2", "type": "BOOLEAN"},
-                {"name": "col3", "type": "FLOAT"}
-            ]},
-        )
-    ]
+            {
+                "fields": [
+                    {"name": "col1", "type": "INTEGER"},
+                    {"name": "col2", "type": "INTEGER"},
+                ]
+            },
+            {
+                "fields": [
+                    {"name": "col2", "type": "BOOLEAN"},
+                    {"name": "col3", "type": "FLOAT"},
+                ]
+            },
+            {
+                "fields": [
+                    {"name": "col1", "type": "INTEGER"},
+                    {"name": "col2", "type": "BOOLEAN"},
+                    {"name": "col3", "type": "FLOAT"},
+                ]
+            },
+        ),
+    ],
 )
 def test_update_schema(schema_old, schema_new, expected_output):
     output = pandas_gbq.schema.update_schema(schema_old, schema_new)

From 34de9e5c540c4eb45cb933e75910fe7c07117b23 Mon Sep 17 00:00:00 2001
From: John Paton <john@johnpaton.net>
Date: Tue, 12 Mar 2019 18:06:41 +0100
Subject: [PATCH 3/6] BUG: make update_schema python 2.7 compatible

---
 pandas_gbq/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py
index e8246d17..c59ed68e 100644
--- a/pandas_gbq/schema.py
+++ b/pandas_gbq/schema.py
@@ -46,7 +46,7 @@ def update_schema(schema_old, schema_new):
     """
     old_fields = schema_old["fields"]
     new_fields = schema_new["fields"]
-    output_fields = old_fields.copy()
+    output_fields = list(old_fields)
 
     field_indices = {field["name"]: i for i, field in enumerate(output_fields)}
 

From ef46a83bfafb6c66ec5b7e43352993a24e566bac Mon Sep 17 00:00:00 2001
From: John Paton <john@johnpaton.net>
Date: Tue, 12 Mar 2019 18:21:59 +0100
Subject: [PATCH 4/6] DOC: update docs to allow for a subset of columns in
 to_gbq table_schema

---
 pandas_gbq/gbq.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 113bfda3..2fa31e4f 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -939,9 +939,11 @@ def to_gbq(
         'STRING'},...]``.
         If schema is not provided, it will be
         generated according to dtypes of DataFrame columns.
-        If schema is provided, it must contain all DataFrame columns.
-        pandas_gbq.gbq._generate_bq_schema() may be used to create an initial
-        schema, though it doesn't preserve column order.
+        If schema is provided, it may contain all or a subset of DataFrame
+        columns. If a subset is provided, the rest will be inferred from
+        the DataFrame dtypes.
+        pandas_gbq.gbq._generate_bq_schema() may be used to create an
+        initial schema, though it doesn't preserve column order.
         See BigQuery API documentation on available names of a field.
 
         .. versionadded:: 0.3.1

From 5a797a087bf96ad9d5893498359f14ddf4d239ec Mon Sep 17 00:00:00 2001
From: John Paton <john@johnpaton.net>
Date: Tue, 12 Mar 2019 18:43:10 +0100
Subject: [PATCH 5/6] DOC: what's new

---
 docs/source/changelog.rst | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 3b43ccd3..c8c34ea7 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -20,6 +20,11 @@ Internal changes
 - Use ``to_dataframe()`` from ``google-cloud-bigquery`` in the ``read_gbq()``
   function. (:issue:`247`)
 
+Enhancements
+~~~~~~~~~~~~
+- Allow ``table_schema`` in :func:`to_gbq` to contain only a subset of columns,
+  with the rest being populated using the DataFrame dtypes (:issue:`218`,
+  contributed by @johnpaton)
 
 .. _changelog-0.9.0:
 
@@ -237,4 +242,4 @@ Initial release of transfered code from `pandas <https://github.com/pandas-dev/p
 Includes patches since the 0.19.2 release on pandas with the following:
 
 - :func:`read_gbq` now allows query configuration preferences `pandas-GH#14742 <https://github.com/pandas-dev/pandas/pull/14742>`__
-- :func:`read_gbq` now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss `pandas-GH#14064 <https://github.com/pandas-dev/pandas/pull/14064>`__, and `pandas-GH#14305 <https://github.com/pandas-dev/pandas/pull/14305>`__
\ No newline at end of file
+- :func:`read_gbq` now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss `pandas-GH#14064 <https://github.com/pandas-dev/pandas/pull/14064>`__, and `pandas-GH#14305 <https://github.com/pandas-dev/pandas/pull/14305>`__

From 6856d058e0fdb3b74b6d92908ea7b22bf3844ec9 Mon Sep 17 00:00:00 2001
From: John Paton <john@johnpaton.net>
Date: Tue, 12 Mar 2019 18:52:52 +0100
Subject: [PATCH 6/6] DOC: close parens around issue in changelog

---
 docs/source/changelog.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index c8c34ea7..e3c0edd7 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -23,8 +23,8 @@ Internal changes
 Enhancements
 ~~~~~~~~~~~~
 - Allow ``table_schema`` in :func:`to_gbq` to contain only a subset of columns,
-  with the rest being populated using the DataFrame dtypes (:issue:`218`,
-  contributed by @johnpaton)
+  with the rest being populated using the DataFrame dtypes (:issue:`218`) 
+  (contributed by @johnpaton)
 
 .. _changelog-0.9.0: