Skip to content

Commit 9deda4e

Browse files
committed
Merge pull request #79 from sshrestha-datalicious/add_udf_support_to_write_table
Add UDF support to write table
2 parents eecd8f4 + 970546f commit 9deda4e

File tree

3 files changed

+50
-1
lines changed

3 files changed

+50
-1
lines changed

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,34 @@ try:
168168
except BigQueryTimeoutException:
169169
print "Timeout"
170170

171+
# write to permanent table with UDF in query string
172+
external_udf_uris = ["gs://bigquery-sandbox-udf/url_decode.js"]
173+
query = """SELECT requests, title
174+
FROM
175+
urlDecode(
176+
SELECT
177+
title, sum(requests) AS num_requests
178+
FROM
179+
[fh-bigquery:wikipedia.pagecounts_201504]
180+
WHERE language = 'fr'
181+
GROUP EACH BY title
182+
)
183+
WHERE title LIKE '%ç%'
184+
ORDER BY requests DESC
185+
LIMIT 100
186+
"""
187+
job = client.write_to_table(
188+
query,
189+
'dataset',
190+
'table',
191+
external_udf_uris=external_udf_uris
192+
)
193+
194+
try:
195+
job_resource = client.wait_for_job(job, timeout=60)
196+
print job_resource
197+
except BigQueryTimeoutException:
198+
print "Timeout"
171199

172200
# write to temporary table
173201
job = client.write_to_table('SELECT * FROM dataset.original_table LIMIT 100')
@@ -176,6 +204,8 @@ try:
176204
print job_resource
177205
except BigQueryTimeoutException:
178206
print "Timeout"
207+
208+
179209
```
180210

181211
# Import data from Google cloud storage

bigquery/client.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,7 @@ def write_to_table(
861861
query,
862862
dataset=None,
863863
table=None,
864+
external_udf_uris=[],
864865
allow_large_results=None,
865866
use_query_cache=None,
866867
priority=None,
@@ -874,6 +875,11 @@ def write_to_table(
874875
query: required BigQuery query string.
875876
dataset: optional string id of the dataset
876877
table: optional string id of the table
878+
external_udf_uris: optional list of external UDF URIs
879+
(if given,
880+
URIs must be Google Cloud Storage
881+
and have .js extensions
882+
)
877883
allow_large_results: optional boolean
878884
use_query_cache: optional boolean
879885
priority: optional string
@@ -919,6 +925,14 @@ def write_to_table(
919925
if write_disposition:
920926
configuration['writeDisposition'] = write_disposition
921927

928+
configuration['userDefinedFunctionResources'] = []
929+
for external_udf_uri in external_udf_uris:
930+
configuration['userDefinedFunctionResources'].append(
931+
{
932+
"resourceUri": external_udf_uri
933+
}
934+
)
935+
922936
body = {
923937
"configuration": {
924938
'query': configuration
@@ -1230,7 +1244,7 @@ def _transform_row(self, row, schema):
12301244

12311245
elif col_dict['type'] == 'BOOLEAN':
12321246
row_value = row_value in ('True', 'true', 'TRUE')
1233-
1247+
12341248
elif col_dict['type'] == 'TIMESTAMP':
12351249
row_value = float(row_value)
12361250

bigquery/tests/test_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,7 @@ def setUp(self):
10121012
self.project_id = 'project'
10131013
self.dataset_id = 'dataset'
10141014
self.table_id = 'table'
1015+
self.external_udf_uris = ['gs://bucket/external_udf.js']
10151016
self.use_query_cache = False
10161017
self.priority = "INTERACTIVE"
10171018
self.client = client.BigQueryClient(self.mock_api,
@@ -1032,6 +1033,9 @@ def test_write(self):
10321033
"tableId": self.table_id
10331034
},
10341035
"query": self.query,
1036+
"userDefinedFunctionResources": [{
1037+
"resourceUri": self.external_udf_uris[0]
1038+
}],
10351039
"useQueryCache": self.use_query_cache,
10361040
"priority": self.priority,
10371041
}
@@ -1042,6 +1046,7 @@ def test_write(self):
10421046
result = self.client.write_to_table(self.query,
10431047
self.dataset_id,
10441048
self.table_id,
1049+
external_udf_uris=self.external_udf_uris,
10451050
use_query_cache=False,
10461051
priority=self.priority)
10471052

0 commit comments

Comments
 (0)