Skip to content

Commit dd79b4f

Browse files
authored
BUG: avoid load jobs for empty dataframes (#255)
* BUG: avoid load jobs for empty dataframes No reason to run a load job if there is no data to load. This avoids a "Empty schema specified for the load job." error when the DataFrame also contains no columns. * Blacken * Remove unused test_size variable.
1 parent 21f49da commit dd79b4f

File tree

2 files changed

+33
-8
lines changed

2 files changed

+33
-8
lines changed

pandas_gbq/gbq.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,11 @@ def to_gbq(
10631063
else:
10641064
table.create(table_id, table_schema)
10651065

1066+
if dataframe.empty:
1067+
# Create the table (if needed), but don't try to run a load job with an
1068+
# empty file. See: https://github.com/pydata/pandas-gbq/issues/237
1069+
return
1070+
10661071
connector.load_data(
10671072
dataframe,
10681073
dataset_id,

tests/system/test_gbq.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,8 @@ def test_tokyo(self, tokyo_dataset, tokyo_table, private_key_path):
892892
class TestToGBQIntegration(object):
893893
@pytest.fixture(autouse=True, scope="function")
894894
def setup(self, project, credentials, random_dataset_id):
895+
from google.cloud import bigquery
896+
895897
# - PER-TEST FIXTURES -
896898
# put here any instruction you want to be run *BEFORE* *EVERY* test is
897899
# executed.
@@ -900,6 +902,9 @@ def setup(self, project, credentials, random_dataset_id):
900902
)
901903
self.destination_table = "{}.{}".format(random_dataset_id, TABLE_ID)
902904
self.credentials = credentials
905+
self.bqclient = bigquery.Client(
906+
project=project, credentials=credentials
907+
)
903908

904909
def test_upload_data(self, project_id):
905910
test_id = "1"
@@ -926,7 +931,6 @@ def test_upload_data(self, project_id):
926931

927932
def test_upload_empty_data(self, project_id):
928933
test_id = "data_with_0_rows"
929-
test_size = 0
930934
df = DataFrame()
931935

932936
gbq.to_gbq(
@@ -936,15 +940,31 @@ def test_upload_empty_data(self, project_id):
936940
credentials=self.credentials,
937941
)
938942

939-
result = gbq.read_gbq(
940-
"SELECT COUNT(*) AS num_rows FROM {0}".format(
941-
self.destination_table + test_id
942-
),
943-
project_id=project_id,
943+
table = self.bqclient.get_table(self.destination_table + test_id)
944+
assert table.num_rows == 0
945+
assert len(table.schema) == 0
946+
947+
def test_upload_empty_data_with_schema(self, project_id):
948+
test_id = "data_with_0_rows"
949+
df = DataFrame(
950+
{
951+
"a": pandas.Series(dtype="int64"),
952+
"b": pandas.Series(dtype="object"),
953+
}
954+
)
955+
956+
gbq.to_gbq(
957+
df,
958+
self.destination_table + test_id,
959+
project_id,
944960
credentials=self.credentials,
945-
dialect="legacy",
946961
)
947-
assert result["num_rows"][0] == test_size
962+
963+
table = self.bqclient.get_table(self.destination_table + test_id)
964+
assert table.num_rows == 0
965+
schema = table.schema
966+
assert schema[0].field_type == "INTEGER"
967+
assert schema[1].field_type == "STRING"
948968

949969
def test_upload_data_if_table_exists_fail(self, project_id):
950970
test_id = "2"

0 commit comments

Comments
 (0)