Skip to content

Commit b1b1479

Browse files
committed
DOC: add detailed docs for read_gbq, to_gbq back.
1 parent ea52387 commit b1b1479

File tree

2 files changed

+206
-10
lines changed

2 files changed

+206
-10
lines changed

pandas/core/frame.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,17 +1116,84 @@ def to_dict(self, orient='dict', into=dict):
11161116
else:
11171117
raise ValueError("orient '%s' not understood" % orient)
11181118

1119-
def to_gbq(self, *args, **kwargs):
1119+
def to_gbq(
1120+
self, destination_table, project_id, chunksize=10000,
1121+
verbose=True, reauth=False, if_exists='fail', private_key=None,
1122+
**kwargs):
11201123
"""
11211124
Write a DataFrame to a Google BigQuery table.
11221125
11231126
This function requires the `pandas-gbq package
11241127
<https://pandas-gbq.readthedocs.io>`__.
11251128
1126-
See: :meth:`pandas_gbq.to_gbq`
1129+
Authentication to the Google BigQuery service is via OAuth 2.0.
1130+
1131+
- If "private_key" is not provided:
1132+
1133+
By default "application default credentials" are used.
1134+
1135+
If default application credentials are not found or are restrictive,
1136+
user account credentials are used. In this case, you will be asked to
1137+
grant permissions for product name 'pandas GBQ'.
1138+
1139+
- If "private_key" is provided:
1140+
1141+
Service account credentials will be used to authenticate.
1142+
1143+
Parameters
1144+
----------
1145+
destination_table : string
1146+
Name of table to be written, in the form 'dataset.tablename'.
1147+
project_id : str
1148+
Google BigQuery Account project ID.
1149+
chunksize : int (default 10000)
1150+
Number of rows to be inserted in each chunk from the dataframe.
1151+
Set to ``None`` to load the whole dataframe at once.
1152+
verbose : boolean (default True)
1153+
Show percentage complete.
1154+
reauth : boolean (default False)
1155+
Force Google BigQuery to reauthenticate the user. This is useful
1156+
if multiple accounts are used.
1157+
if_exists : {'fail', 'replace', 'append'}, default 'fail'
1158+
Behavior when the destination table exists.
1159+
'fail': If table exists, do nothing.
1160+
'replace': If table exists, drop it, recreate it, and insert data.
1161+
'append': If table exists, insert data. Create if does not exist.
1162+
private_key : str (optional)
1163+
Service account private key in JSON format. Can be file path
1164+
or string contents. This is useful for remote server
1165+
authentication (eg. Jupyter/IPython notebook on remote host).
1166+
kwargs : dict
1167+
Arbitrary keyword arguments.
1168+
1169+
auth_local_webserver (boolean): default False
1170+
Use the [local webserver flow] instead of the [console flow] when
1171+
getting user credentials.
1172+
1173+
.. [local webserver flow]
1174+
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
1175+
.. [console flow]
1176+
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
1177+
.. versionadded:: pandas-gbq 0.2.0
1178+
table_schema (list of dicts):
1179+
List of BigQuery table fields to which according DataFrame columns
1180+
conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If
1181+
schema is not provided, it will be generated according to dtypes
1182+
of DataFrame columns. See BigQuery API documentation on available
1183+
names of a field.
1184+
.. versionadded:: pandas-gbq 0.3.1
1185+
1186+
See Also
1187+
--------
1188+
pandas_gbq.to_gbq
1189+
pandas.io.to_gbq
11271190
"""
11281191
from pandas.io import gbq
1129-
return gbq.to_gbq(self, *args, **kwargs)
1192+
return gbq.to_gbq(
1193+
self, destination_table, project_id, chunksize=chunksize,
1194+
verbose=verbose, reauth=reauth, if_exists=if_exists,
1195+
private_key=private_key, **kwargs)
1196+
11301197

11311198
@classmethod
11321199
def from_records(cls, data, index=None, exclude=None, columns=None,

pandas/io/gbq.py

Lines changed: 136 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,32 +21,161 @@ def _try_import():
2121
return pandas_gbq
2222

2323

24-
def read_gbq(*args, **kwargs):
24+
def read_gbq(
25+
query, project_id=None, index_col=None, col_order=None, reauth=False,
26+
verbose=True, private_key=None, dialect='legacy', **kwargs):
2527
"""
2628
Load data from Google BigQuery.
2729
2830
This function requires the `pandas-gbq package
2931
<https://pandas-gbq.readthedocs.io>`__.
3032
31-
See :meth:`pandas_gbq.read_gbq`.
33+
Authentication to the Google BigQuery service is via OAuth 2.0.
34+
35+
- If "private_key" is not provided:
36+
37+
By default "application default credentials" are used.
38+
39+
If default application credentials are not found or are restrictive,
40+
user account credentials are used. In this case, you will be asked to
41+
grant permissions for product name 'pandas GBQ'.
42+
43+
- If "private_key" is provided:
44+
45+
Service account credentials will be used to authenticate.
46+
47+
Parameters
48+
----------
49+
query : str
50+
SQL-Like Query to return data values.
51+
project_id : str
52+
Google BigQuery Account project ID.
53+
index_col : str (optional)
54+
Name of result column to use for index in results DataFrame.
55+
col_order : list(str) (optional)
56+
List of BigQuery column names in the desired order for results
57+
DataFrame.
58+
reauth : boolean (default False)
59+
Force Google BigQuery to reauthenticate the user. This is useful
60+
if multiple accounts are used.
61+
verbose : boolean (default True)
62+
Verbose output.
63+
private_key : str (optional)
64+
Service account private key in JSON format. Can be file path
65+
or string contents. This is useful for remote server
66+
authentication (eg. Jupyter/IPython notebook on remote host).
67+
dialect : {'legacy', 'standard'}, default 'legacy'
68+
SQL syntax dialect to use.
69+
'legacy' : Use BigQuery's legacy SQL dialect.
70+
'standard' : Use BigQuery's standard SQL, which is
71+
compliant with the SQL 2011 standard. For more information
72+
see `BigQuery SQL Reference
73+
<https://cloud.google.com/bigquery/sql-reference/>`__.
74+
kwargs : dict
75+
Arbitrary keyword arguments.
76+
configuration (dict): query config parameters for job processing.
77+
For example:
78+
79+
configuration = {'query': {'useQueryCache': False}}
80+
81+
For more information see `BigQuery SQL Reference
82+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
3283
3384
Returns
3485
-------
3586
df: DataFrame
36-
DataFrame representing results of query
87+
DataFrame representing results of query.
88+
89+
See Also
90+
--------
91+
pandas_gbq.read_gbq
3792
"""
3893
pandas_gbq = _try_import()
39-
return pandas_gbq.read_gbq(*args, **kwargs)
94+
return pandas_gbq.read_gbq(
95+
query, project_id=project_id,
96+
index_col=index_col, col_order=col_order,
97+
reauth=reauth, verbose=verbose,
98+
private_key=private_key,
99+
dialect=dialect,
100+
**kwargs)
40101

41102

42-
def to_gbq(*args, **kwargs):
103+
def to_gbq(
104+
dataframe, destination_table, project_id, chunksize=10000,
105+
verbose=True, reauth=False, if_exists='fail', private_key=None,
106+
**kwargs):
43107
"""
44108
Write a DataFrame to a Google BigQuery table.
45109
46110
This function requires the `pandas-gbq package
47111
<https://pandas-gbq.readthedocs.io>`__.
48112
49-
See :meth:`pandas_gbq.to_gbq`.
113+
Authentication to the Google BigQuery service is via OAuth 2.0.
114+
115+
- If "private_key" is not provided:
116+
117+
By default "application default credentials" are used.
118+
119+
If default application credentials are not found or are restrictive,
120+
user account credentials are used. In this case, you will be asked to
121+
grant permissions for product name 'pandas GBQ'.
122+
123+
- If "private_key" is provided:
124+
125+
Service account credentials will be used to authenticate.
126+
127+
Parameters
128+
----------
129+
dataframe : DataFrame
130+
DataFrame to be written.
131+
destination_table : string
132+
Name of table to be written, in the form 'dataset.tablename'.
133+
project_id : str
134+
Google BigQuery Account project ID.
135+
chunksize : int (default 10000)
136+
Number of rows to be inserted in each chunk from the dataframe.
137+
Set to ``None`` to load the whole dataframe at once.
138+
verbose : boolean (default True)
139+
Show percentage complete.
140+
reauth : boolean (default False)
141+
Force Google BigQuery to reauthenticate the user. This is useful
142+
if multiple accounts are used.
143+
if_exists : {'fail', 'replace', 'append'}, default 'fail'
144+
Behavior when the destination table exists.
145+
'fail': If table exists, do nothing.
146+
'replace': If table exists, drop it, recreate it, and insert data.
147+
'append': If table exists, insert data. Create if does not exist.
148+
private_key : str (optional)
149+
Service account private key in JSON format. Can be file path
150+
or string contents. This is useful for remote server
151+
authentication (eg. Jupyter/IPython notebook on remote host).
152+
kwargs : dict
153+
Arbitrary keyword arguments.
154+
155+
auth_local_webserver (boolean): default False
156+
Use the [local webserver flow] instead of the [console flow] when
157+
getting user credentials.
158+
159+
.. [local webserver flow]
160+
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
161+
.. [console flow]
162+
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
163+
.. versionadded:: pandas-gbq 0.2.0
164+
table_schema (list of dicts):
165+
List of BigQuery table fields to which according DataFrame columns
166+
conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If
167+
schema is not provided, it will be generated according to dtypes
168+
of DataFrame columns. See BigQuery API documentation on available
169+
names of a field.
170+
.. versionadded:: pandas-gbq 0.3.1
171+
172+
See Also
173+
--------
174+
pandas_gbq.to_gbq
175+
pandas.DataFrame.to_gbq
50176
"""
51177
pandas_gbq = _try_import()
52-
pandas_gbq.to_gbq(*args, **kwargs)
178+
pandas_gbq.to_gbq(
179+
dataframe, destination_table, project_id, chunksize=chunksize,
180+
verbose=verbose, reauth=reauth, if_exists=if_exists,
181+
private_key=private_key, **kwargs)

0 commit comments

Comments
 (0)