diff --git a/README.md b/README.md index cdec34f..6b4606c 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Simple Python client for interacting with Google BigQuery. This client provides an API for retrieving and inserting BigQuery data by wrapping Google's low-level API client library. It also provides facilities that make it convenient to access data that is tied to an App Engine appspot, such as request logs. +[Documentation](http://tylertreat.github.io/BigQuery-Python/) + # Installation `pip install bigquery-python` @@ -29,7 +31,7 @@ client = get_client(project_id, service_account=service_account, # JSON key provided by Google json_key = 'key.json' -client = get_client(project_id, json_key_file=json_key, readonly=True) +client = get_client(json_key_file=json_key, readonly=True) # Submit an async query. job_id, _results = client.query('SELECT * FROM dataset.my_table LIMIT 1000') @@ -99,13 +101,32 @@ conditions = [ } ] +grouping = ['Timestamp'] + +having = [ + { + 'field': 'Timestamp', + 'type': 'INTEGER', + 'comparators': [ + { + 'condition': '==', + 'negate': False, + 'value': 1399478981 + } + ] + } +] + +order_by ={'fields': ['Timestamp'], 'direction': 'desc'} + query = render_query( 'dataset', ['table'], select=selects, conditions=conditions, - groupings=['Timestamp'], - order_by={'field': 'Timestamp', 'direction': 'desc'} + groupings=grouping, + having=having, + order_by=order_by ) job_id, _ = client.query(query) @@ -168,6 +189,34 @@ try: except BigQueryTimeoutException: print "Timeout" +# write to permanent table with UDF in query string +external_udf_uris = ["gs://bigquery-sandbox-udf/url_decode.js"] +query = """SELECT requests, title + FROM + urlDecode( + SELECT + title, sum(requests) AS num_requests + FROM + [fh-bigquery:wikipedia.pagecounts_201504] + WHERE language = 'fr' + GROUP EACH BY title + ) + WHERE title LIKE '%รง%' + ORDER BY requests DESC + LIMIT 100 + """ +job = client.write_to_table( + query, + 'dataset', + 'table', + external_udf_uris=external_udf_uris +) + +try: + job_resource = client.wait_for_job(job, timeout=60) + print job_resource +except BigQueryTimeoutException: + print "Timeout" # write to temporary table job = client.write_to_table('SELECT * FROM dataset.original_table LIMIT 100') @@ -176,6 +225,8 @@ try: print job_resource except BigQueryTimeoutException: print "Timeout" + + ``` # Import data from Google cloud storage diff --git a/bigquery/__init__.py b/bigquery/__init__.py index ef22544..b393875 100644 --- a/bigquery/__init__.py +++ b/bigquery/__init__.py @@ -1,4 +1,7 @@ from __future__ import absolute_import + +from .version import __version__ + from .client import get_client from .client import ( BIGQUERY_SCOPE, diff --git a/bigquery/client.py b/bigquery/client.py index 860b887..ea5c503 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1,22 +1,26 @@ import calendar import json -import logging +from logging import getLogger from collections import defaultdict from datetime import datetime, timedelta from hashlib import sha256 +from io import StringIO from time import sleep, time -import httplib2 import six -from apiclient.discovery import build -from apiclient.errors import HttpError - from bigquery.errors import (BigQueryTimeoutException, JobExecutingException, JobInsertException, UnfinishedQueryException) -from bigquery.schema_builder import schema_from_record +from googleapiclient.discovery import build, DISCOVERY_URI +from googleapiclient.errors import HttpError +from httplib2 import Http + +BIGQUERY_SCOPE = [ + 'https://www.googleapis.com/auth/bigquery' +] -BIGQUERY_SCOPE = 'https://www.googleapis.com/auth/bigquery' -BIGQUERY_SCOPE_READ_ONLY = 'https://www.googleapis.com/auth/bigquery.readonly' +BIGQUERY_SCOPE_READ_ONLY = [ + 'https://www.googleapis.com/auth/bigquery.readonly' +] CACHE_TIMEOUT = timedelta(seconds=30) @@ -42,8 +46,11 @@ JOB_FORMAT_NEWLINE_DELIMITED_JSON JOB_DESTINATION_FORMAT_CSV = JOB_FORMAT_CSV +logger = getLogger(__name__) -def get_client(project_id, credentials=None, service_account=None, + +def get_client(project_id=None, credentials=None, + service_url=None, service_account=None, private_key=None, private_key_file=None, json_key=None, json_key_file=None, readonly=True, swallow_results=True): @@ -51,75 +58,113 @@ def get_client(project_id, credentials=None, service_account=None, AssertionCredentials or a service account and private key combination need to be provided in order to authenticate requests to BigQuery. - Args: - project_id: the BigQuery project id. - credentials: an AssertionCredentials instance to authenticate requests - to BigQuery. - service_account: the Google API service account name. - private_key: the private key associated with the service account in - PKCS12 or PEM format. - private_key_file: the name of the file containing the private key - associated with the service account in PKCS12 or PEM - format. - json_key: the JSON key associated with the service account - json_key_file: the name of the JSON key file associated with - the service account - readonly: bool indicating if BigQuery access is read-only. Has no - effect if credentials are provided. - swallow_results: If set to false then return the actual response value - instead of converting to a boolean. - - Returns: - an instance of BigQueryClient. + Parameters + ---------- + project_id : str, optional + The BigQuery project id, required unless json_key or json_key_file is + provided. + credentials : oauth2client.client.SignedJwtAssertionCredentials, optional + AssertionCredentials instance to authenticate requests to BigQuery + (optional, must provide `service_account` and (`private_key` or + `private_key_file`) or (`json_key` or `json_key_file`) if not included + service_url : str, optional + A URI string template pointing to the location of Google's API + discovery service. Requires two parameters {api} and {apiVersion} that + when filled in produce an absolute URI to the discovery document for + that service. If not set then the default googleapiclient discovery URI + is used. See `credentials` + service_account : str, optional + The Google API service account name. See `credentials` + private_key : str, optional + The private key associated with the service account in PKCS12 or PEM + format. See `credentials` + private_key_file : str, optional + The name of the file containing the private key associated with the + service account in PKCS12 or PEM format. See `credentials` + json_key : dict, optional + The JSON key associated with the service account. See `credentials` + json_key_file : str, optional + The name of the JSON key file associated with the service account. See + `credentials`. + readonly : bool + Bool indicating if BigQuery access is read-only. Has no effect if + credentials are provided. Default True. + swallow_results : bool + If set to False, then return the actual response value instead of + converting to boolean. Default True. + + Returns + ------- + BigQueryClient + An instance of the BigQuery client. """ if not credentials: - assert (service_account and (private_key or private_key_file)) or (json_key or json_key_file), \ - 'Must provide AssertionCredentials or service account and P12 key or JSON key' + assert (service_account and (private_key or private_key_file)) or ( + json_key or json_key_file), \ + 'Must provide AssertionCredentials or service account and P12 key\ + or JSON key' + + if not project_id: + assert json_key or json_key_file, \ + 'Must provide project_id unless json_key or json_key_file is\ + provided' + + if service_url is None: + service_url = DISCOVERY_URI + + scope = BIGQUERY_SCOPE_READ_ONLY if readonly else BIGQUERY_SCOPE if private_key_file: - with open(private_key_file, 'rb') as key_file: - private_key = key_file.read() + credentials = _credentials().from_p12_keyfile(service_account, + private_key_file, + scopes=scope) + + if private_key: + try: + if isinstance(private_key, basestring): + private_key = private_key.decode('utf-8') + except NameError: + # python3 -- private_key is already unicode + pass + credentials = _credentials().from_p12_keyfile_buffer( + service_account, + StringIO(private_key), + scopes=scope) if json_key_file: with open(json_key_file, 'r') as key_file: json_key = json.load(key_file) if json_key: - service_account = json_key['client_email'] - private_key = json_key['private_key'] + credentials = _credentials().from_json_keyfile_dict(json_key, + scopes=scope) + if not project_id: + project_id = json_key['project_id'] bq_service = _get_bq_service(credentials=credentials, - service_account=service_account, - private_key=private_key, - readonly=readonly) + service_url=service_url) return BigQueryClient(bq_service, project_id, swallow_results) -def _get_bq_service(credentials=None, service_account=None, private_key=None, - readonly=True): +def _get_bq_service(credentials=None, service_url=None): """Construct an authorized BigQuery service object.""" - assert credentials or (service_account and private_key), \ - 'Must provide AssertionCredentials or service account and key' - - if not credentials: - scope = BIGQUERY_SCOPE_READ_ONLY if readonly else BIGQUERY_SCOPE - credentials = _credentials()(service_account, private_key, scope=scope) + assert credentials, 'Must provide ServiceAccountCredentials' - http = httplib2.Http() - http = credentials.authorize(http) - service = build('bigquery', 'v2', http=http) + http = credentials.authorize(Http()) + service = build('bigquery', 'v2', http=http, + discoveryServiceUrl=service_url) return service def _credentials(): """Import and return SignedJwtAssertionCredentials class""" - from oauth2client.client import SignedJwtAssertionCredentials + from oauth2client.service_account import ServiceAccountCredentials - return SignedJwtAssertionCredentials + return ServiceAccountCredentials class BigQueryClient(object): @@ -140,22 +185,26 @@ def _submit_query_job(self, query_data): For fine-grained control over a query job, see: https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#query + Parameters + ---------- + query_data + query object as per "configuration.query" in + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query - - Args: - query_data: query object as per "configuration.query" in - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query - - Returns: + Returns + ------- + tuple job id and query results if query completed. If dry_run is True, job id will be None and results will be empty if the query is valid or a dict containing the response if invalid. - Raises: - BigQueryTimeoutException on timeout + Raises + ------ + BigQueryTimeoutException + On timeout """ - logging.debug('Submitting query job: %s' % query_data) + logger.debug('Submitting query job: %s' % query_data) job_collection = self.bigquery.jobs() @@ -175,7 +224,7 @@ def _submit_query_job(self, query_data): # raise exceptions if it's not an async query # and job is not completed after timeout if not job_complete and query_data.get("timeoutMs", False): - logging.error('BigQuery job %s timeout' % job_id) + logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() return job_id, [self._transform_row(row, schema) for row in rows] @@ -191,18 +240,20 @@ def _insert_job(self, body_object): For more details, see: https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#insert + Parameters + ---------- + body_object : body object passed to bigquery.jobs().insert() - Args: - body_object: body object passed to bigquery.jobs().insert() - - Returns: - response of the bigquery.jobs().insert().execute() call + Returns + ------- + response of the bigquery.jobs().insert().execute() call - Raises: - BigQueryTimeoutException on timeout + Raises + ------ + BigQueryTimeoutException on timeout """ - logging.debug('Submitting job: %s' % body_object) + logger.debug('Submitting job: %s' % body_object) job_collection = self.bigquery.jobs() @@ -214,25 +265,34 @@ def _insert_job(self, body_object): def query(self, query, max_results=None, timeout=0, dry_run=False): """Submit a query to BigQuery. - Args: - query: BigQuery query string. - max_results: maximum number of rows to return per page of results. - timeout: how long to wait for the query to complete, in seconds, - before the request times out and returns. - dry_run: if True, the query isn't actually run. A valid query will - return an empty response, while an invalid one will return - the same error message it would if it wasn't a dry run. - - Returns: - job id and query results if query completed. If dry_run is True, + Parameters + ---------- + query : str + BigQuery query string + max_results : int, optional + The maximum number of rows to return per page of results. + timeout : float, optional + How long to wait for the query to complete, in seconds before + the request times out and returns. + dry_run : bool, optional + If True, the query isn't actually run. A valid query will return an + empty response, while an invalid one will return the same error + message it would if it wasn't a dry run. + + Returns + ------- + tuple + (job id, query results) if the query completed. If dry_run is True, job id will be None and results will be empty if the query is valid - or a dict containing the response if invalid. + or a ``dict`` containing the response if invalid. - Raises: - BigQueryTimeoutException on timeout + Raises + ------ + BigQueryTimeoutException + on timeout """ - logging.debug('Executing query: %s' % query) + logger.debug('Executing query: %s' % query) query_data = { 'query': query, @@ -245,16 +305,21 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): def get_query_schema(self, job_id): """Retrieve the schema of a query by job id. - Args: - job_id: The job_id that references a BigQuery query. - Returns: - A list of dictionaries that represent the schema. + Parameters + ---------- + job_id : str + The job_id that references a BigQuery query + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent the schema. """ query_reply = self.get_query_results(job_id, offset=0, limit=0) if not query_reply['jobComplete']: - logging.warning('BigQuery job %s not complete' % job_id) + logger.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() return query_reply['schema']['fields'] @@ -262,13 +327,18 @@ def get_query_schema(self, job_id): def get_table_schema(self, dataset, table): """Return the table schema. - Args: - dataset: the dataset containing the table. - table: the table to get the schema for. - - Returns: - A list of dicts that represent the table schema. If the table - doesn't exist, None is returned. + Parameters + ---------- + dataset : str + The dataset containing the `table`. + table : str + The table to get the schema for + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent the table schema. If + the table doesn't exist, None is returned. """ try: @@ -278,7 +348,7 @@ def get_table_schema(self, dataset, table): datasetId=dataset).execute() except HttpError as e: if int(e.resp['status']) == 404: - logging.warn('Table %s.%s does not exist', dataset, table) + logger.warn('Table %s.%s does not exist', dataset, table) return None raise @@ -287,12 +357,17 @@ def get_table_schema(self, dataset, table): def check_job(self, job_id): """Return the state and number of results of a query by job id. - Args: - job_id: The job id of the query to check. - - Returns: - Whether or not the query has completed and the total number of rows - included in the query table if it has completed. + Parameters + ---------- + job_id : str + The job id of the query to check. + + Returns + ------- + tuple + (``bool``, ``int``) Whether or not the query has completed and the + total number of rows included in the query table if it has + completed (else 0) """ query_reply = self.get_query_results(job_id, offset=0, limit=0) @@ -302,23 +377,32 @@ def check_job(self, job_id): def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): """Retrieve a list of rows from a query table by job id. - This method will append results from multiple pages together. If you want - to manually page through results, you can use `get_query_results` + This method will append results from multiple pages together. If you + want to manually page through results, you can use `get_query_results` method directly. - Args: - job_id: The job id that references a BigQuery query. - offset: The offset of the rows to pull from BigQuery. - limit: The number of rows to retrieve from a query table. - timeout: Timeout in seconds. - Returns: - A list of dictionaries that represent table rows. + Parameters + ---------- + job_id : str + The job id that references a BigQuery query. + offset : int, optional + The offset of the rows to pull from BigQuery + limit : int, optional + The number of rows to retrieve from a query table. + timeout : float, optional + Timeout in seconds. + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent table rows. """ # Get query results - query_reply = self.get_query_results(job_id, offset=offset, limit=limit, timeout=timeout) + query_reply = self.get_query_results(job_id, offset=offset, + limit=limit, timeout=timeout) if not query_reply['jobComplete']: - logging.warning('BigQuery job %s not complete' % job_id) + logger.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() schema = query_reply["schema"]["fields"] @@ -327,32 +411,43 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): records = [self._transform_row(row, schema) for row in rows] # Append to records if there are multiple pages for query results - while page_token: - query_reply = self.get_query_results(job_id, offset=offset, limit=limit, - page_token=page_token, timeout=timeout) + while page_token and (not limit or len(records) < limit): + query_reply = self.get_query_results( + job_id, offset=offset, limit=limit, page_token=page_token, + timeout=timeout) page_token = query_reply.get("pageToken") rows = query_reply.get('rows', []) records += [self._transform_row(row, schema) for row in rows] - return records + return records[:limit] if limit else records def check_dataset(self, dataset_id): """Check to see if a dataset exists. - Args: - dataset: dataset unique id - Returns: - bool indicating if the table exists. + + Parameters + ---------- + dataset_id : str + Dataset unique id + + Returns + ------- + bool + True if dataset at `dataset_id` exists, else Fasle """ dataset = self.get_dataset(dataset_id) return bool(dataset) def get_dataset(self, dataset_id): - """ - Retrieve a dataset if it exists, otherwise return an empty dict. - Args: - dataset: dataset unique id - Returns: - dictionary containing the dataset object if it exists, otherwise - an empty dictionary + """Retrieve a dataset if it exists, otherwise return an empty dict. + + Parameters + ---------- + dataset_id : str + Dataset unique id + + Returns + ------- + dict + Contains dataset object if it exists, else empty """ try: dataset = self.bigquery.datasets().get( @@ -365,27 +460,35 @@ def get_dataset(self, dataset_id): def check_table(self, dataset, table): """Check to see if a table exists. - Args: - dataset: the dataset to check. - table: the name of the table. - - Returns: - bool indicating if the table exists. + Parameters + ---------- + dataset : str + The dataset to check + table : str + The name of the table + + Returns + ------- + bool + True if table exists, else False """ table = self.get_table(dataset, table) return bool(table) def get_table(self, dataset, table): - """ - Retrieve a table if it exists, otherwise return an empty dict. - - Args: - dataset: the dataset that the table is in - table: the name of the table - - Returns: - dictionary containing the table object if it exists, otherwise - an empty dictionary + """ Retrieve a table if it exists, otherwise return an empty dict. + + Parameters + ---------- + dataset : str + The dataset that the table is in + table : str + The name of the table + + Returns + ------- + dict + Containing the table object if it exists, else empty """ try: table = self.bigquery.tables().get( @@ -399,15 +502,22 @@ def get_table(self, dataset, table): def create_table(self, dataset, table, schema, expiration_time=None): """Create a new table in the dataset. - Args: - dataset: the dataset to create the table in. - table: the name of table to create. - schema: table schema dict. - expiration_time: the expiry time in milliseconds since the epoch. - - Returns: - bool indicating if the table was successfully created or not, - or response from BigQuery if swallow_results is set for False. + Parameters + ---------- + dataset : str + The dataset to create the table in + table : str + The name of the table to create + schema : dict + The table schema + expiration_time : float, optional + The expiry time in milliseconds since the epoch. + + Returns + ------- + Union[bool, dict] + If the table was successfully created, or response from BigQuery + if swallow_results is set to False """ body = { @@ -434,9 +544,102 @@ def create_table(self, dataset, table, schema, expiration_time=None): return table except HttpError as e: - logging.error(('Cannot create table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + logger.error(('Cannot create table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, e.content)) + if self.swallow_results: + return False + else: + return {} + + def update_table(self, dataset, table, schema): + """Update an existing table in the dataset. + + Parameters + ---------- + dataset : str + The dataset to update the table in + table : str + The name of the table to update + schema : dict + Table schema + + Returns + ------- + Union[bool, dict] + bool indicating if the table was successfully updated or not, + or response from BigQuery if swallow_results is set to False. + """ + + body = { + 'schema': {'fields': schema}, + 'tableReference': { + 'tableId': table, + 'projectId': self.project_id, + 'datasetId': dataset + } + } + + try: + result = self.bigquery.tables().update( + projectId=self.project_id, + datasetId=dataset, + body=body + ).execute() + if self.swallow_results: + return True + else: + return result + + except HttpError as e: + logger.error(('Cannot update table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, e.content)) + if self.swallow_results: + return False + else: + return {} + + def patch_table(self, dataset, table, schema): + """Patch an existing table in the dataset. + + Parameters + ---------- + dataset : str + The dataset to patch the table in + table : str + The name of the table to patch + schema : dict + The table schema + + Returns + ------- + Union[bool, dict] + Bool indicating if the table was successfully patched or not, + or response from BigQuery if swallow_results is set to False + """ + + body = { + 'schema': {'fields': schema}, + 'tableReference': { + 'tableId': table, + 'projectId': self.project_id, + 'datasetId': dataset + } + } + + try: + result = self.bigquery.tables().patch( + projectId=self.project_id, + datasetId=dataset, + body=body + ).execute() + if self.swallow_results: + return True + else: + return result + + except HttpError as e: + logger.error(('Cannot patch table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: return False else: @@ -445,14 +648,20 @@ def create_table(self, dataset, table, schema, expiration_time=None): def create_view(self, dataset, view, query): """Create a new view in the dataset. - Args: - dataset: the dataset to create the view in. - view: the name of view to create. - query: a query that BigQuery executes when the view is referenced. - - Returns: + Parameters + ---------- + dataset : str + The dataset to create the view in + view : str + The name of the view to create + query : dict + A query that BigQuery executes when the view is referenced. + + Returns + ------- + Union[bool, dict] bool indicating if the view was successfully created or not, - or response from BigQuery if swallow_results is set for False. + or response from BigQuery if swallow_results is set to False. """ body = { @@ -478,9 +687,8 @@ def create_view(self, dataset, view, query): return view except HttpError as e: - logging.error(('Cannot create view {0}.{1}\n' - 'Http Error: {2}').format(dataset, view, - e.content)) + logger.error(('Cannot create view {0}.{1}\n' + 'Http Error: {2}').format(dataset, view, e.content)) if self.swallow_results: return False else: @@ -489,11 +697,16 @@ def create_view(self, dataset, view, query): def delete_table(self, dataset, table): """Delete a table from the dataset. - Args: - dataset: the dataset to delete the table from. - table: the name of the table to delete. + Parameters + ---------- + dataset : str + The dataset to delete the table from. + table : str + The name of the table to delete - Returns: + Returns + ------- + Union[bool, dict] bool indicating if the table was successfully deleted or not, or response from BigQuery if swallow_results is set for False. """ @@ -510,9 +723,8 @@ def delete_table(self, dataset, table): return response except HttpError as e: - logging.error(('Cannot delete table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + logger.error(('Cannot delete table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: return False else: @@ -522,16 +734,21 @@ def get_tables(self, dataset_id, app_id, start_time, end_time): """Retrieve a list of tables that are related to the given app id and are inside the range of start and end times. - Args: - dataset_id: The BigQuery dataset id to consider. - app_id: The appspot name - start_time: The datetime or unix time after which records will be - fetched. - end_time: The datetime or unix time up to which records will be - fetched. - - Returns: - A list of table names. + Parameters + ---------- + dataset_id : str + The BigQuery dataset id to consider. + app_id : str + The appspot name + start_time : Union[datetime, int] + The datetime or unix time after which records will be fetched. + end_time : Union[datetime, int] + The datetime or unix time up to which records will be fetched. + + Returns + ------- + list + A ``list`` of table names. """ if isinstance(start_time, datetime): @@ -565,40 +782,57 @@ def import_data_from_uris( skip_leading_rows=None, ): """ - Imports data into a BigQuery table from cloud storage. - Args: - source_uris: required string or list of strings representing - the uris on cloud storage of the form: - gs://bucket/filename - dataset: required string id of the dataset - table: required string id of the table - job: optional string identifying the job (a unique jobid - is automatically generated if not provided) - schema: optional list representing the bigquery schema - source_format: optional string - (one of the JOB_SOURCE_FORMAT_* constants) - create_disposition: optional string - (one of the JOB_CREATE_* constants) - write_disposition: optional string - (one of the JOB_WRITE_* constants) - encoding: optional string default - (one of the JOB_ENCODING_* constants) - ignore_unknown_values: optional boolean - max_bad_records: optional boolean - allow_jagged_rows: optional boolean for csv only - allow_quoted_newlines: optional boolean for csv only - field_delimiter: optional string for csv only - quote: optional string the quote character for csv only - skip_leading_rows: optional int for csv only - - Optional arguments with value None are determined by - BigQuery as described: + Imports data into a BigQuery table from cloud storage. Optional + arguments that are not specified are determined by BigQuery as + described: https://developers.google.com/bigquery/docs/reference/v2/jobs - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Parameters + ---------- + source_urls : list + A ``list`` of ``str`` objects representing the urls on cloud + storage of the form: gs://bucket/filename + dataset : str + String id of the dataset + table : str + String id of the table + job : str, optional + Identifies the job (a unique job id is automatically generated if + not provided) + schema : list, optional + Represents the BigQuery schema + source_format : str, optional + One of the JOB_SOURCE_FORMAT_* constants + create_disposition : str, optional + One of the JOB_CREATE_* constants + write_disposition : str, optional + One of the JOB_WRITE_* constants + encoding : str, optional + One of the JOB_ENCODING_* constants + ignore_unknown_values : bool, optional + Whether or not to ignore unknown values + max_bad_records : int, optional + Maximum number of bad records + allow_jagged_rows : bool, optional + For csv only + allow_quoted_newlines : bool, optional + For csv only + field_delimiter : str, optional + For csv only + quote : str, optional + Quote character for csv only + skip_leading_rows : int, optional + For csv only + + Returns + ------- + dict + A BigQuery job response + + Raises + ------ + JobInsertException + on http/auth failures or error in result """ source_uris = source_uris if isinstance(source_uris, list) \ else [source_uris] @@ -683,7 +917,7 @@ def import_data_from_uris( } } - logging.debug("Creating load job %s" % body) + logger.debug("Creating load job %s" % body) job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -700,30 +934,40 @@ def export_data_to_uris( field_delimiter=None, ): """ - Export data from a BigQuery table to cloud storage. - Args: - destination_uris: required string or list of strings representing - the uris on cloud storage of the form: - gs://bucket/filename - dataset: required string id of the dataset - table: required string id of the table - job: optional string identifying the job (a unique jobid - is automatically generated if not provided) - compression: optional string - (one of the JOB_COMPRESSION_* constants) - destination_format: optional string - (one of the JOB_DESTINATION_FORMAT_* constants) - print_header: optional boolean - field_delimiter: optional string - - Optional arguments with value None are determined by - BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs - - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Export data from a BigQuery table to cloud storage. Optional arguments + that are not specified are determined by BigQuery as described: + https://developers.google.com/bigquery/docs/reference/v2/jobs + + Parameters + ---------- + destination_urls : Union[str, list] + ``str`` or ``list`` of ``str`` objects representing the URIs on + cloud storage of the form: gs://bucket/filename + dataset : str + String id of the dataset + table : str + String id of the table + job : str, optional + String identifying the job (a unique jobid is automatically + generated if not provided) + compression : str, optional + One of the JOB_COMPRESSION_* constants + destination_format : str, optional + One of the JOB_DESTination_FORMAT_* constants + print_header : bool, optional + Whether or not to print the header + field_delimiter : str, optional + Character separating fields in delimited file + + Returns + ------- + dict + A BigQuery job resource + + Raises + ------ + JobInsertException + On http/auth failures or error in result """ destination_uris = destination_uris \ if isinstance(destination_uris, list) else [destination_uris] @@ -767,7 +1011,7 @@ def export_data_to_uris( } } - logging.info("Creating export job %s" % body) + logger.info("Creating export job %s" % body) job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -777,6 +1021,7 @@ def write_to_table( query, dataset=None, table=None, + external_udf_uris=[], allow_large_results=None, use_query_cache=None, priority=None, @@ -785,28 +1030,41 @@ def write_to_table( ): """ Write query result to table. If dataset or table is not provided, - Bigquery will write the result to temporary table. - Args: - query: required BigQuery query string. - dataset: optional string id of the dataset - table: optional string id of the table - allow_large_results: optional boolean - use_query_cache: optional boolean - priority: optional string - (one of the JOB_PRIORITY_* constants) - create_disposition: optional string - (one of the JOB_CREATE_* constants) - write_disposition: optional string - (one of the JOB_WRITE_* constants) - - Optional arguments with value None are determined by - BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs - - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Bigquery will write the result to temporary table. Optional arguments + that are not specified are determined by BigQuery as described: + https://developers.google.com/bigquery/docs/reference/v2/jobs + + Parameters + ---------- + query : str + BigQuery query string + dataset : str, optional + String id of the dataset + table : str, optional + String id of the table + external_udf_uris : list, optional + Contains extternal UDF URIs. If given, URIs must be Google Cloud + Storage and have .js extensions. + allow_large_results : bool, optional + Whether or not to allow large results + use_query_cache : bool, optional + Whether or not to use query cache + priority : str, optional + One of the JOB_PRIORITY_* constants + create_disposition : str, optional + One of the JOB_CREATE_* constants + write_disposition : str, optional + One of the JOB_WRITE_* constants + + Returns + ------- + dict + A BigQuery job resource + + Raises + ------ + JobInsertException + On http/auth failures or error in result """ configuration = { @@ -835,13 +1093,21 @@ def write_to_table( if write_disposition: configuration['writeDisposition'] = write_disposition + configuration['userDefinedFunctionResources'] = [] + for external_udf_uri in external_udf_uris: + configuration['userDefinedFunctionResources'].append( + { + "resourceUri": external_udf_uri + } + ) + body = { "configuration": { 'query': configuration } } - logging.info("Creating write to table job %s" % body) + logger.info("Creating write to table job %s" % body) job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -849,18 +1115,27 @@ def write_to_table( def wait_for_job(self, job, interval=5, timeout=60): """ Waits until the job indicated by job_resource is done or has failed - Args: - job: dict, representing a BigQuery job resource - or str, representing a BigQuery job id - interval: optional float polling interval in seconds, default = 5 - timeout: optional float timeout in seconds, default = 60 - Returns: - dict, final state of the job_resource, as described here: - https://developers.google.com/resources/api-libraries/documentation - /bigquery/v2/python/latest/bigquery_v2.jobs.html#get - Raises: - JobExecutingException on http/auth failures or error in result - BigQueryTimeoutException on timeout + + Parameters + ---------- + job : Union[dict, str] + ``dict`` representing a BigQuery job resource, or a ``str`` + representing the BigQuery job id + interval : float, optional + Polling interval in seconds, default = 5 + timeout : float, optional + Timeout in seconds, default = 60 + + Returns + ------- + dict + Final state of the job resouce, as described here: + https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#get + + Raises + ------ + Union[JobExecutingException, BigQueryTimeoutException] + On http/auth failures or timeout """ complete = False job_id = str(job if isinstance(job, @@ -881,21 +1156,37 @@ def wait_for_job(self, job, interval=5, timeout=60): # raise exceptions if timeout if not complete: - logging.error('BigQuery job %s timeout' % job_id) + logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() return job_resource - def push_rows(self, dataset, table, rows, insert_id_key=None): + def push_rows(self, dataset, table, rows, insert_id_key=None, + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None): """Upload rows to BigQuery table. - Args: - dataset: the dataset to upload to. - table: the name of the table to insert rows into. - rows: list of rows to add to table - insert_id_key: key for insertId in row - - Returns: + Parameters + ---------- + dataset : str + The dataset to upload to + table : str + The name of the table to insert rows into + rows : list + A ``list`` of rows (``dict`` objects) to add to the table + insert_id_key : str, optional + Key for insertId in row + skip_invalid_rows : bool, optional + Insert all valid rows of a request, even if invalid rows exist. + ignore_unknown_values : bool, optional + Accept rows that contain values that do not match the schema. + template_suffix : str, optional + Inserts the rows into an {table}{template_suffix}. + If table {table}{template_suffix} doesn't exist, create from {table}. + + Returns + ------- + Union[bool, dict] bool indicating if insert succeeded or not, or response from BigQuery if swallow_results is set for False. """ @@ -915,6 +1206,15 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): "rows": rows_data } + if skip_invalid_rows is not None: + data['skipInvalidRows'] = skip_invalid_rows + + if ignore_unknown_values is not None: + data['ignoreUnknownValues'] = ignore_unknown_values + + if template_suffix is not None: + data['templateSuffix'] = template_suffix + try: response = table_data.insertAll( projectId=self.project_id, @@ -924,7 +1224,7 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): ).execute() if response.get('insertErrors'): - logging.error('BigQuery insert errors: %s' % response) + logger.error('BigQuery insert errors: %s' % response) if self.swallow_results: return False else: @@ -936,7 +1236,7 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): return response except HttpError as e: - logging.exception('Problem with BigQuery insertAll') + logger.exception('Problem with BigQuery insertAll') if self.swallow_results: return False else: @@ -952,12 +1252,18 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): def _get_all_tables(self, dataset_id, cache=False): """Retrieve a list of all tables for the dataset. - Args: - dataset_id: the dataset to retrieve table names for. - cache: To use cached value or not. Timeout value - equals CACHE_TIMEOUT. - Returns: - a dictionary of app ids mapped to their table names. + Parameters + ---------- + dataset_id : str + The dataset to retrieve table names for + cache : bool, optional + To use cached value or not (default False). Timeout value equals + CACHE_TIMEOUT. + + Returns + ------- + dict + A ``dict`` of app ids mapped to their table names """ do_fetch = True if cache and self.cache.get(dataset_id): @@ -986,12 +1292,15 @@ def _get_all_tables(self, dataset_id, cache=False): def _parse_table_list_response(self, list_response): """Parse the response received from calling list on tables. - Args: - list_response: The response found by calling list on a BigQuery - table object. + Parameters + ---------- + list_response + The response found by calling list on a BigQuery table object. - Returns: - The dictionary of dates referenced by table names. + Returns + ------- + dict + Dates referenced by table names """ tables = defaultdict(dict) @@ -1022,12 +1331,16 @@ def _parse_table_name(self, table_id): """Parse a table name in the form of appid_YYYY_MM or YYYY_MM_appid and return a tuple consisting of YYYY-MM and the app id. - Args: - table_id: The table id as listed by BigQuery. + Parameters + ---------- + table_id : str + The table id as listed by BigQuery - Returns: - Tuple containing year/month and app id. Returns None, None if the - table id cannot be parsed. + Returns + ------- + tuple + (year/month, app id), or (None, None) if the table id cannot be + parsed. """ # Prefix date @@ -1056,13 +1369,19 @@ def _filter_tables_by_time(self, tables, start_time, end_time): """Filter a table dictionary and return table names based on the range of start and end times in unix seconds. - Args: - tables: The dictionary of dates referenced by table names - start_time: The unix time after which records will be fetched. - end_time: The unix time up to which records will be fetched. - - Returns: - A list of table names that are inside the time range. + Parameters + ---------- + tables : dict + Dates referenced by table names + start_time : int + The unix time after which records will be fetched + end_time : int + The unix time up to which records will be fetched + + Returns + ------- + list + Table names that are inside the time range """ return [table_name for (table_name, unix_seconds) in tables.items() @@ -1071,12 +1390,18 @@ def _filter_tables_by_time(self, tables, start_time, end_time): def _in_range(self, start_time, end_time, time): """Indicate if the given time falls inside of the given range. - Args: - start_time: The unix time for the start of the range. - end_time: The unix time for the end of the range. - time: The unix time to check. - - Returns: + Parameters + ---------- + start_time : int + The unix time for the start of the range + end_time : int + The unix time for the end of the range + time : int + The unix time to check + + Returns + ------- + bool True if the time falls within the range, False otherwise. """ @@ -1086,18 +1411,30 @@ def _in_range(self, start_time, end_time, time): time <= start_time <= time + ONE_MONTH or \ time <= end_time <= time + ONE_MONTH - def get_query_results(self, job_id, offset=None, limit=None, page_token=None, timeout=0): - """Execute the query job indicated by the given job id. This is direct mapping to - bigquery api https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults - - Args: - job_id: The job id of the query to check. - offset: The index the result set should start at. - limit: The maximum number of results to retrieve. - page_token: Page token, returned by a previous call, to request the next page of results. - timeout: Timeout in seconds. - Returns: - The query reply. + def get_query_results(self, job_id, offset=None, limit=None, + page_token=None, timeout=0): + """Execute the query job indicated by the given job id. This is direct + mapping to bigquery api + https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults + + Parameters + ---------- + job_id : str + The job id of the query to check + offset : optional + The index the result set should start at. + limit : int, optional + The maximum number of results to retrieve. + page_token : optional + Page token, returned by previous call, to request the next page of + results. + timeout : float, optional + Timeout in seconds + + Returns + ------- + out + The query reply """ job_collection = self.bigquery.jobs() @@ -1112,14 +1449,18 @@ def get_query_results(self, job_id, offset=None, limit=None, page_token=None, ti def _transform_row(self, row, schema): """Apply the given schema to the given BigQuery data row. - Args: - row: A single BigQuery row to transform. - schema: The BigQuery table schema to apply to the row, specifically - the list of field dicts. - - Returns: - Dict containing keys that match the schema and values that match - the row. + Parameters + ---------- + row + A single BigQuery row to transform + schema : list + The BigQuery table schema to apply to the row, specifically + the list of field dicts. + + Returns + ------- + dict + Mapping schema to row """ log = {} @@ -1146,7 +1487,7 @@ def _transform_row(self, row, schema): elif col_dict['type'] == 'BOOLEAN': row_value = row_value in ('True', 'true', 'TRUE') - + elif col_dict['type'] == 'TIMESTAMP': row_value = float(row_value) @@ -1158,12 +1499,16 @@ def _recurse_on_row(self, col_dict, nested_value): """Apply the schema specified by the given dict to the nested value by recursing on it. - Args: - col_dict: A dict containing the schema to apply to the nested - value. - nested_value: A value nested in a BigQuery row. - Returns: - Dict or list of dicts from applied schema. + Parameters + ---------- + col_dict : dict + The schema to apply to the nested value. + nested_value : A value nested in a BigQuery row. + + Returns + ------- + Union[dict, list] + ``dict`` or ``list`` of ``dict`` objects from applied schema. """ row_value = None @@ -1182,10 +1527,15 @@ def _recurse_on_row(self, col_dict, nested_value): def _generate_hex_for_uris(self, uris): """Given uris, generate and return hex version of it - Args: - uris: A list containing all uris - Returns: - string of hexed uris + Parameters + ---------- + uris : list + Containing all uris + + Returns + ------- + str + Hexed uris """ return sha256((":".join(uris) + str(time())).encode()).hexdigest() @@ -1218,18 +1568,23 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, access=None): """Create a new BigQuery dataset. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset, not the - integer id of the dataset) - friendly_name: optional string providing a human readable name - description: optional longer string providing a description - access: optional object indicating access permissions (see - https://developers.google.com/bigquery/docs/reference/v2/ - datasets#resource) - - Returns: - bool indicating if dataset was created or not, or response + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the dataset with the project (the + referenceID of the dataset, not the integer id of the dataset) + friendly_name: str, optional + A human readable name + description: str, optional + Longer string providing a description + access : list, optional + Indicating access permissions (see + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if dataset was created or not, or response from BigQuery if swallow_results is set for False """ try: @@ -1246,8 +1601,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logging.error('Cannot create dataset {0}, {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot create dataset {0}, {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1256,8 +1611,10 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, def get_datasets(self): """List all datasets in the project. - Returns: - a list of dataset resources + Returns + ------- + list + Dataset resources """ try: datasets = self.bigquery.datasets() @@ -1265,23 +1622,31 @@ def get_datasets(self): result = request.execute() return result.get('datasets', []) except HttpError as e: - logging.error("Cannot list datasets: {0}".format(e)) + logger.error("Cannot list datasets: {0}".format(e)) return None def delete_dataset(self, dataset_id, delete_contents=False): """Delete a BigQuery dataset. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset) - delete_contents: forces deletion of the dataset even when the - dataset contains data - Returns: - bool indicating if the delete was successful or not, or response + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the datset with the project (the + referenceId of the dataset) + delete_contents : bool, optional + If True, forces the deletion of the dataset even when the dataset + contains data (Default = False) + + Returns + ------- + Union[bool, dict[ + ool indicating if the delete was successful or not, or response from BigQuery if swallow_results is set for False - Raises: - HttpError 404 when dataset with dataset_id does not exist + Raises + ------- + HttpError + 404 when dataset with dataset_id does not exist """ try: datasets = self.bigquery.datasets() @@ -1294,8 +1659,8 @@ def delete_dataset(self, dataset_id, delete_contents=False): else: return response except HttpError as e: - logging.error('Cannot delete dataset {0}: {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot delete dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1307,16 +1672,23 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset). - friendly_name: an optional descriptive name for the dataset. - description: an optional description of the dataset. - access: an optional object indicating access permissions. - - Returns: - bool indicating if the update was successful or not, or response - from BigQuery if swallow_results is set for False. + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the dataset with the project (the + referencedId of the dataset) + friendly_name : str, optional + An optional descriptive name for the dataset. + description : str, optional + An optional description of the dataset. + access : list, optional + Indicating access permissions + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if the update was successful or not, or + response from BigQuery if swallow_results is set for False. """ try: datasets = self.bigquery.datasets() @@ -1331,8 +1703,8 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logging.error('Cannot update dataset {0}: {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot update dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1344,14 +1716,22 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. - Args: - dataset_id: required unique string identifying the dataset with the - projedct (the referenceId of the dataset). - friendly_name: an optional descriptive name for the dataset. - description: an optional description of the dataset. - access: an optional object indicating access permissions. - Returns: - bool indicating if the patch was successful or not, or response + Parameters + ---------- + dataset_id : str + Unique string idenfitying the dataset with the project (the + referenceId of the dataset) + friendly_name : str, optional + An optional descriptive name for the dataset. + description : str, optional + An optional description of the dataset. + access : list, optional + Indicating access permissions. + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if the patch was successful or not, or response from BigQuery if swallow_results is set for False. """ try: @@ -1366,8 +1746,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logging.error('Cannot patch dataset {0}: {1}'.format(dataset_id, - e)) + logger.error('Cannot patch dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1375,17 +1754,24 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, def dataset_resource(self, ref_id, friendly_name=None, description=None, access=None): - """See https://developers.google.com/bigquery/docs/reference/v2/ - datasets#resource - - Args: - ref_id: string dataset id (the reference id, not the integer id) - friendly_name: opt string - description: opt string - access: opt list - - Returns: - a dictionary representing a BigQuery dataset resource + """See + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource + + Parameters + ---------- + ref_id : str + Dataset id (the reference id, not the integer id) + friendly_name : str, optional + An optional descriptive name for the dataset + description : str, optional + An optional description for the dataset + access : list, optional + Indicating access permissions + + Returns + ------- + dict + Representing BigQuery dataset resource """ data = { "datasetReference": { @@ -1407,18 +1793,27 @@ def schema_from_record(cls, record): """Given a dict representing a record instance to be inserted into BigQuery, calculate the schema. - Args: - record: dict representing a record to be inserted into big query, - where all keys are strings (representing column names in - the record) and all values are of type int, str, unicode, - float,bool, timestamp or dict. A dict value represents a - record, and must conform to the same restrictions as record - - Returns: - a list representing a BigQuery schema - - Note: results are undefined if a different value types are provided for - a repeated field: E.g. - { rfield: [ { x: 1}, {x: "a string"} ] } # undefined! + Parameters + ---------- + record : dict + representing a record to be inserted into big query, + where all keys are ``str`` objects (representing column names in + the record) and all values are of type ``int``, ``str``, + ``unicode``, ``float``, ``bool``, ``datetime``, or ``dict``. A + ``dict`` value represents a record, and must conform to the same + restrictions as record. + + Returns + ------- + list + BigQuery schema + + Notes + ----- + Results are undefined if a different value type is provided for a + repeated field: E.g. + + >>> { rfield: [ { x: 1}, {x: "a string"} ] } # undefined! """ + from bigquery.schema_builder import schema_from_record return schema_from_record(record) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 1cfa72a..8fc403f 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -1,57 +1,55 @@ -import logging +from logging import getLogger + +logger = getLogger(__name__) def render_query(dataset, tables, select=None, conditions=None, - groupings=None, order_by=None): + groupings=None, having=None, order_by=None): """Render a query that will run over the given tables using the specified parameters. - Args: - dataset: the BigQuery data set to query data from. - tables: the tables in dataset to query. - select: a dictionary of selections for a table. The keys function as - column names and the values function as options to apply to - the select field such as alias and format. For example, - { - 'start_time': { - 'alias': 'StartTime', - 'format': 'INTEGER-FORMAT_UTC_USEC' - } - } - is represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as - StartTime' in a query. Pass None to select all. - conditions: a list of dicts to filter results by. - Each dict should be formatted as the following: - { - 'field': 'foo', - 'type': 'FLOAT', - 'comparators': [ - { - 'condition': '>=', - 'negate': False, - 'value': '1' - } - ] - } - which is rendered as 'foo >= FLOAT('1')' in the query. - groupings: a list of field names to group by. - order_by: a dict with two keys, field and direction. - Such that the dictionary should be formatted as - {'field':'TimeStamp, 'direction':'desc'}. - - Returns: - a query string. + Parameters + ---------- + dataset : str + The BigQuery dataset to query data from + tables : Union[dict, list] + The table in `dataset` to query. + select : dict, optional + The keys function as column names and the values function as options to + apply to the select field such as alias and format. For example, + select['start_time'] might have the form + {'alias': 'StartTime', 'format': 'INTEGER-FORMAT_UTC_USEC'}, which + would be represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as + StartTime' in a query. Pass `None` to select all. + conditions : list, optional + a ``list`` of ``dict`` objects to filter results by. Each dict should + have the keys 'field', 'type', and 'comparators'. The first two map to + strings representing the field (e.g. 'foo') and type (e.g. 'FLOAT'). + 'comparators' maps to another ``dict`` containing the keys 'condition', + 'negate', and 'value'. + If 'comparators' = {'condition': '>=', 'negate': False, 'value': 1}, + this example will be rdnered as 'foo >= FLOAT('1')' in the query. + ``list`` of field names to group by + order_by : dict, optional + Keys = {'field', 'direction'}. `dict` should be formatted as + {'field':'TimeStamp, 'direction':'desc'} or similar + + Returns + ------- + str + A rendered query """ if None in (dataset, tables): return None - query = "%s %s %s %s %s" % ( + query = "%s %s %s %s %s %s" % ( _render_select(select), _render_sources(dataset, tables), _render_conditions(conditions), _render_groupings(groupings), - _render_order(order_by), + _render_having(having), + _render_order(order_by) ) return query @@ -60,17 +58,19 @@ def render_query(dataset, tables, select=None, conditions=None, def _render_select(selections): """Render the selection part of a query. - Args: - selections: a dictionary of selections for a table. The - keys function as column names and the values function as - options to apply to the select field such as alias and format. - For example {'start_time': {'alias': 'StartTime', 'format': - 'INTEGER-FORMAT_UTC_USEC'}} is represented as - 'SEC_TO_TIMESTAMP(INTEGER(start_time))' in a query. Pass None to - select all. - - Returns: - a string that represents the select part of a query. + Parameters + ---------- + selections : dict + Selections for a table + + Returns + ------- + str + A string for the "select" part of a query + + See Also + -------- + render_query : Further clarification of `selections` dict formatting """ if not selections: @@ -99,15 +99,20 @@ def _render_select(selections): def _format_select(formatter, name): """Modify the query selector by applying any formatters to it. - Args: - formatter: hyphen-delimited formatter string where formatters are - applied inside-out, e.g. the formatter string - SEC_TO_MICRO-INTEGER-FORMAT_UTC_USEC applied to the selector - foo would result in FORMAT_UTC_USEC(INTEGER(foo*1000000)). - name: the name of the selector to apply formatters to. - - Returns: - formatted selector. + Parameters + ---------- + formatter : str + Hyphen-delimited formatter string where formatters are + applied inside-out, e.g. the formatter string + SEC_TO_MICRO-INTEGER-FORMAT_UTC_USEC applied to the selector + foo would result in FORMAT_UTC_USEC(INTEGER(foo*1000000)). + name: str + The name of the selector to apply formatters to. + + Returns + ------- + str + The formatted selector """ for caster in formatter.split('-'): @@ -125,30 +130,52 @@ def _format_select(formatter, name): def _render_sources(dataset, tables): """Render the source part of a query. - Args: - dataset: the data set to fetch log data from. - tables: the tables to fetch log data from. - - Returns: - a string that represents the from part of a query. + Parameters + ---------- + dataset : str + The data set to fetch log data from. + tables : Union[dict, list] + The tables to fetch log data from + + Returns + ------- + str + A string that represents the "from" part of a query. """ - return "FROM " + ", ".join( - ["[%s.%s]" % (dataset, table) for table in tables]) + if isinstance(tables, dict): + if tables.get('date_range', False): + try: + dataset_table = '.'.join([dataset, tables['table']]) + return "FROM (TABLE_DATE_RANGE([{}], TIMESTAMP('{}'),"\ + " TIMESTAMP('{}'))) ".format(dataset_table, + tables['from_date'], + tables['to_date']) + except KeyError as exp: + logger.warn( + 'Missing parameter %s in selecting sources' % (exp)) + + else: + return "FROM " + ", ".join( + ["[%s.%s]" % (dataset, table) for table in tables]) def _render_conditions(conditions): """Render the conditions part of a query. - Args: - conditions: a list of dictionary items to filter a table. - Each dict should be formatted as {'field': 'start_time', - 'value': {'value': 1, 'negate': False}, 'comparator': '>', - 'type': 'FLOAT'} which is represetned as - 'start_time > FLOAT('1')' in the query. + Parameters + ---------- + conditions : list + A list of dictionay items to filter a table. + + Returns + ------- + str + A string that represents the "where" part of a query - Returns: - a string that represents the where part of a query. + See Also + -------- + render_query : Further clarification of `conditions` formatting. """ if not conditions: @@ -162,7 +189,7 @@ def _render_conditions(conditions): comparators = condition.get('comparators') if None in (field, field_type, comparators) or not comparators: - logging.warn('Invalid condition passed in: %s' % condition) + logger.warn('Invalid condition passed in: %s' % condition) continue rendered_conditions.append( @@ -177,14 +204,18 @@ def _render_conditions(conditions): def _render_condition(field, field_type, comparators): """Render a single query condition. - Args: - field: the field the condition applies to. - field_type: the data type of the field. - comparator: the logic operator to use. - value_dicts: a list of value dicts of the form - {'value': 'foo', 'negate': False} - - Returns: + Parameters + ---------- + field : str + The field the condition applies to + field_type : str + The data type of the field. + comparators : array_like + An iterable of logic operators to use. + + Returns + ------- + str a condition string. """ @@ -206,6 +237,15 @@ def _render_condition(field, field_type, comparators): else: value = _render_condition_value(value, field_type) value = "(" + value + ")" + elif condition == "BETWEEN": + if isinstance(value, (tuple, list, set)) and len(value) == 2: + value = ' AND '.join( + sorted([_render_condition_value(v, field_type) + for v in value]) + ) + elif isinstance(value, (tuple, list, set)) and len(value) != 2: + logger.warn('Invalid condition passed in: %s' % condition) + else: value = _render_condition_value(value, field_type) @@ -229,12 +269,17 @@ def _render_condition(field, field_type, comparators): def _render_condition_value(value, field_type): """Render a query condition value. - Args: - value: the value of the condition. - field_type: the data type of the field. - - Returns: - a value string. + Parameters + ---------- + value : Union[bool, int, float, str, datetime] + The value of the condition + field_type : str + The data type of the field + + Returns + ------- + str + A value string. """ # BigQuery cannot cast strings to booleans, convert to ints @@ -242,38 +287,88 @@ def _render_condition_value(value, field_type): value = 1 if value else 0 elif field_type in ("STRING", "INTEGER", "FLOAT"): value = "'%s'" % (value) + elif field_type in ("TIMESTAMP"): + value = "'%s'" % (str(value)) return "%s(%s)" % (field_type, value) -def _render_order(order): - """Render the order by part of a query. +def _render_groupings(fields): + """Render the group by part of a query. - Args: - order: a dictionary with two keys, field and direction. - Such that the dictionary should be formatted as - {'field':'TimeStamp, 'direction':'desc'}. + Parameters + ---------- + fields : list + A list of fields to group by. - Returns: - a string that represents the order by part of a query. + Returns + ------- + str + A string that represents the "group by" part of a query. """ - if not order or 'field' not in order or 'direction' not in order: - return '' + if not fields: + return "" - return "ORDER BY %s %s" % (order['field'], order['direction']) + return "GROUP BY " + ", ".join(fields) -def _render_groupings(fields): - """Render the group by part of a query. +def _render_having(having_conditions): + """Render the having part of a query. + + Parameters + ---------- + having_conditions : list + A ``list`` of ``dict``s to filter the rows - Args: - fields: a list of fields to group by. + Returns + ------- + str + A string that represents the "having" part of a query. - Returns: - a string that represents the group by part of a query. + See Also + -------- + render_query : Further clarification of `conditions` formatting. """ + if not having_conditions: + return "" - if not fields: + rendered_conditions = [] + + for condition in having_conditions: + field = condition.get('field') + field_type = condition.get('type') + comparators = condition.get('comparators') + + if None in (field, field_type, comparators) or not comparators: + logger.warn('Invalid condition passed in: %s' % condition) + continue + + rendered_conditions.append( + _render_condition(field, field_type, comparators)) + + if not rendered_conditions: return "" - return "GROUP BY " + ", ".join(fields) + return "HAVING %s" % (" AND ".join(rendered_conditions)) + + +def _render_order(order): + """Render the order by part of a query. + + Parameters + ---------- + order : dict + A dictionary with two keys, fields and direction. + Such that the dictionary should be formatted as + {'fields': ['TimeStamp'], 'direction':'desc'}. + + Returns + ------- + str + A string that represents the "order by" part of a query. + """ + + if not order or 'fields' not in order or 'direction' not in order: + return '' + + return "ORDER BY %s %s" % (", ".join(order['fields']), order['direction']) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 09084a7..575b390 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -23,13 +23,17 @@ def schema_from_record(record, timestamp_parser=default_timestamp_parser): """Generate a BigQuery schema given an example of a record that is to be inserted into BigQuery. - Args: - record: dict - timestamp_parser: unary function taking a string and return non-NIL if - string represents a date - - Returns: - schema: list + Parameters + ---------- + record : dict + Example of a record that is to be inserted into BigQuery + timestamp_parser : function, optional + Unary function taking a ``str`` and returning and ``bool`` that is + True if the string represents a date + + Returns + ------- + Schema: list """ return [describe_field(k, v, timestamp_parser=timestamp_parser) for k, v in list(record.items())] @@ -41,16 +45,25 @@ def describe_field(k, v, timestamp_parser=default_timestamp_parser): element describing that field. Raise errors if invalid value types are provided. - Args: - k: str/unicode, key representing the column - v: str/unicode/int/float/datetime/object - - Returns: - object describing the field - - Raises: - Exception: if invalid value types are provided. - + Parameters + ---------- + k : Union[str, unicode] + Key representing the column + v : Union[str, unicode, int, float, datetime, object] + Value mapped to by `k` + + Returns + ------- + object + Describing the field + + Raises + ------ + Exception + If invalid value types are provided. + + Examples + -------- >>> describe_field("username", "Bob") {"name": "username", "type": "string", "mode": "nullable"} >>> describe_field("users", [{"username": "Bob"}]) @@ -90,9 +103,22 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): one of str/unicode/int/float/datetime/record, where record is a dict containing value which have matching BigQuery types. - Returns: - str or None if no matching type could be found - + Parameters + ---------- + o : object + A Python object + time_stamp_parser : function, optional + Unary function taking a ``str`` and returning and ``bool`` that is + True if the string represents a date + + Returns + ------- + Union[str, None] + Name of the corresponding BigQuery type for `o`, or None if no type + could be found + + Examples + -------- >>> bigquery_type("abc") "string" >>> bigquery_type(123) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index d8869ee..39bf05b 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2,18 +2,16 @@ import mock import six -from nose.tools import raises - -from apiclient.errors import HttpError from bigquery import client from bigquery.errors import ( JobInsertException, JobExecutingException, BigQueryTimeoutException ) +from googleapiclient.errors import HttpError +from nose.tools import raises class HttpResponse(object): - def __init__(self, status, reason='There was an error'): """ Args: @@ -24,7 +22,6 @@ def __init__(self, status, reason='There was an error'): class TestGetClient(unittest.TestCase): - def setUp(self): client._bq_client = None @@ -50,7 +47,8 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_service_url = mock.Mock() + mock_cred.from_p12_keyfile_buffer.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key = 'key' @@ -59,14 +57,18 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, private_key=key, + project_id, service_url=mock_service_url, + service_account=service_account, private_key=key, readonly=True) mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE_READ_ONLY) - self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_cred.from_p12_keyfile_buffer.assert_called_once_with( + service_account, mock.ANY, + scopes=BIGQUERY_SCOPE_READ_ONLY) + self.assertTrue( + mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -80,7 +82,8 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_service_url = mock.Mock() + mock_cred.from_p12_keyfile_buffer.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key = 'key' @@ -89,22 +92,23 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, private_key=key, + project_id, service_url=mock_service_url, + service_account=service_account, private_key=key, readonly=False) mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE) - self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_cred.from_p12_keyfile_buffer.assert_called_once_with( + service_account, mock.ANY, scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @mock.patch('bigquery.client._credentials') @mock.patch('bigquery.client.build') - @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') - def test_initialize_key_file(self, mock_open, mock_build, - mock_return_cred): + def test_initialize_key_file(self, mock_build, mock_return_cred): """Ensure that a BigQueryClient is initialized and returned with read/write permissions using a private key file. """ @@ -112,26 +116,28 @@ def test_initialize_key_file(self, mock_open, mock_build, mock_cred = mock.Mock() mock_http = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_service_url = mock.Mock() + mock_cred.from_p12_keyfile.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key_file = 'key.pem' - key = 'key' - mock_open.return_value.__enter__.return_value.read.return_value = key service_account = 'account' project_id = 'project' mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, + project_id, service_url=mock_service_url, + service_account=service_account, private_key_file=key_file, readonly=False) - mock_open.assert_called_once_with(key_file, 'rb') mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE) - self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_cred.from_p12_keyfile.assert_called_once_with(service_account, + key_file, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_p12_keyfile.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -147,7 +153,8 @@ def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred) mock_cred = mock.Mock() mock_http = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_service_url = mock.Mock() + mock_cred.from_json_keyfile_dict.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq json_key_file = 'key.json' @@ -156,16 +163,56 @@ def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred) project_id = 'project' mock_return_cred.return_value = mock_cred - bq_client = client.get_client(project_id, json_key_file=json_key_file, readonly=False) + bq_client = client.get_client( + project_id, service_url=mock_service_url, + json_key_file=json_key_file, readonly=False) - mock_open.assert_called_once_with(json_key_file, 'r') mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(json_key['client_email'], json_key['private_key'], scope=BIGQUERY_SCOPE) - self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_cred.from_json_keyfile_dict.assert_called_once_with(json_key, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_json_keyfile_dict.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) + @mock.patch('bigquery.client._credentials') + @mock.patch('bigquery.client.build') + @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') + def test_initialize_json_key_file_without_project_id(self, mock_open, mock_build, + mock_return_cred): + """Ensure that a BigQueryClient is initialized and returned with + read/write permissions using a JSON key file without project_id. + """ + from bigquery.client import BIGQUERY_SCOPE + import json + + mock_cred = mock.Mock() + mock_http = mock.Mock() + mock_service_url = mock.Mock() + mock_cred.from_json_keyfile_dict.return_value.authorize.return_value = mock_http + mock_bq = mock.Mock() + mock_build.return_value = mock_bq + json_key_file = 'key.json' + json_key = {'client_email': 'mail', 'private_key': 'pkey', 'project_id': 'project'} + mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(json_key) + mock_return_cred.return_value = mock_cred + + bq_client = client.get_client( + service_url=mock_service_url, json_key_file=json_key_file, readonly=False) + + mock_open.assert_called_once_with(json_key_file, 'r') + mock_return_cred.assert_called_once_with() + mock_cred.from_json_keyfile_dict.assert_called_once_with(json_key, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_json_keyfile_dict.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) + self.assertEquals(mock_bq, bq_client.bigquery) + self.assertEquals(json_key['project_id'], bq_client.project_id) + class TestQuery(unittest.TestCase): @@ -1012,6 +1059,7 @@ def setUp(self): self.project_id = 'project' self.dataset_id = 'dataset' self.table_id = 'table' + self.external_udf_uris = ['gs://bucket/external_udf.js'] self.use_query_cache = False self.priority = "INTERACTIVE" self.client = client.BigQueryClient(self.mock_api, @@ -1032,6 +1080,9 @@ def test_write(self): "tableId": self.table_id }, "query": self.query, + "userDefinedFunctionResources": [{ + "resourceUri": self.external_udf_uris[0] + }], "useQueryCache": self.use_query_cache, "priority": self.priority, } @@ -1042,6 +1093,7 @@ def test_write(self): result = self.client.write_to_table(self.query, self.dataset_id, self.table_id, + external_udf_uris=self.external_udf_uris, use_query_cache=False, priority=self.priority) @@ -1588,6 +1640,156 @@ def test_table_create_body_with_expiration_time(self): self.mock_tables.insert.return_value.execute.assert_called_with() +class TestUpdateTable(unittest.TestCase): + + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.project = 'project' + self.dataset = 'dataset' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'schema': {'fields': self.schema}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset} + } + self.expiration_time = 1437513693000 + + def test_table_update_failed(self): + """Ensure that if updating the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" + + self.mock_tables.update.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.update.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.update.return_value.execute.assert_called_with() + + def test_table_update_success(self): + """Ensure that if updating the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" + + self.mock_tables.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.update.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.update.return_value.execute.assert_called_with() + + +class TestPatchTable(unittest.TestCase): + + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.project = 'project' + self.dataset = 'dataset' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'schema': {'fields': self.schema}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset} + } + self.expiration_time = 1437513693000 + + def test_table_patch_failed(self): + """Ensure that if patching the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" + + self.mock_tables.patch.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.patch.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.patch.return_value.execute.assert_called_with() + + def test_table_patch_success(self): + """Ensure that if patching the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" + + self.mock_tables.patch.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.patch.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.patch.return_value.execute.assert_called_with() + + class TestCreateView(unittest.TestCase): def setUp(self): @@ -1942,6 +2144,50 @@ def test_push_success(self): self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) + def test_request_data_with_options(self): + """Ensure that insertAll body has optional property only when + the optional parameter of push_rows passed. + """ + expected_body = self.data.copy() + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one', + ignore_unknown_values=False, + skip_invalid_rows=False) + expected_body['ignoreUnknownValues'] = False + expected_body['skipInvalidRows'] = False + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one', + ignore_unknown_values=True, + skip_invalid_rows=True, + template_suffix='20160428' + ) + expected_body['ignoreUnknownValues'] = True + expected_body['skipInvalidRows'] = True + expected_body['templateSuffix'] = '20160428' + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + class TestGetAllTables(unittest.TestCase): diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 8591c6b..df37a3e 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -80,6 +80,22 @@ def test_no_dataset(self): self.assertEqual(result, 'FROM [.man], [.pig], [.bro]') + def test_tables_in_date_range(self): + """Ensure that render sources can handle tables in DATE RANGE.""" + from bigquery.query_builder import _render_sources + + tables = { + 'date_range': True, + 'from_date': '2015-08-23', + 'to_date': '2015-10-10', + 'table': 'pets_' + } + + result = _render_sources('animals', tables) + + self.assertEqual(result, "FROM (TABLE_DATE_RANGE([animals.pets_], " + "TIMESTAMP('2015-08-23'), TIMESTAMP('2015-10-10'))) ") + class TestRenderConditions(unittest.TestCase): @@ -218,6 +234,42 @@ def test_in_comparator(self): "foobar IN (STRING('n'))))" [len('WHERE '):] .split(' AND ')) + def test_between_comparator(self): + """Ensure that render conditions can handle "BETWEEN" condition.""" + from bigquery.query_builder import _render_conditions + + result = _render_conditions([ + { + 'field': 'foobar', + 'type': 'STRING', + 'comparators': [ + {'condition': 'BETWEEN', 'negate': False, + 'value': ['a', 'b']}, + {'condition': 'BETWEEN', 'negate': False, + 'value': {'c', 'd'}}, + {'condition': 'BETWEEN', 'negate': False, + 'value': ('e', 'f')}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ['h', 'i']}, + {'condition': 'BETWEEN', 'negate': True, + 'value': {'j', 'k'}}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ('l', 'm')} + ] + } + ]) + + six.assertCountEqual(self, result[len('WHERE '):].split(' AND '), + "WHERE ((foobar BETWEEN STRING('a') AND " + "STRING('b') AND foobar BETWEEN STRING('c') " + "AND STRING('d') AND foobar BETWEEN " + "STRING('e') AND STRING('f')) AND (NOT foobar " + "BETWEEN STRING('h') AND STRING('i') AND NOT " + "foobar BETWEEN STRING('j') AND STRING('k') " + "AND NOT foobar BETWEEN STRING('l') AND " + "STRING('m')))" [len('WHERE '):] + .split(' AND ')) + class TestRenderOrder(unittest.TestCase): @@ -225,7 +277,7 @@ def test_order(self): """Ensure that render order can work under expected conditions.""" from bigquery.query_builder import _render_order - result = _render_order({'field': 'foo', 'direction': 'desc'}) + result = _render_order({'fields': ['foo'], 'direction': 'desc'}) self.assertEqual(result, "ORDER BY foo desc") @@ -259,6 +311,35 @@ def test_no_fields(self): self.assertEqual(result, "") +class TestRenderHaving(unittest.TestCase): + + def test_mutliple_fields(self): + """Ensure that render having works with multiple fields.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having([ + { + 'field': 'bar', + 'type': 'STRING', + 'comparators': [ + {'condition': '>=', 'negate': False, 'value': '1'} + ] + } + ]) + + self.assertEqual(result, "HAVING (bar >= STRING('1'))") + + def test_no_fields(self): + """Ensure that render having can work with out any arguments.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having(None) + + self.assertEqual(result, "") + + class TestRenderQuery(unittest.TestCase): def test_full_query(self): @@ -298,13 +379,27 @@ def test_full_query(self): } ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + having=[ + { + 'field': 'status', + 'comparators': [ + { + 'condition': '==', + 'value': 1, + 'negate': False + } + ], + 'type': 'INTEGER' + } + ], + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM [dataset.2013_06_appspot_1]" " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " - "timestamp, status ORDER BY timestamp desc") + "timestamp, status HAVING (status == INTEGER('1')) " + "ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -327,17 +422,18 @@ def test_empty_conditions(self): 'resource': {'alias': 'url'} }, conditions=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] result_select = (result[len('SELECT '):].split('FROM')[0] .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] six.assertCountEqual(self, expected_select, result_select) six.assertCountEqual(self, expected_from, result_from) @@ -363,11 +459,11 @@ def test_incorrect_conditions(self): 'negate': False}, 'compoorattor': '>=', 'type': 'INTEGER'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -411,7 +507,7 @@ def test_multiple_condition_values(self): 'negate': False}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " @@ -420,7 +516,7 @@ def test_multiple_condition_values(self): "INTEGER('1371556954')) AND " "((resource CONTAINS STRING('foo') AND resource " "CONTAINS STRING('baz')) AND (NOT resource CONTAINS " - "STRING('bar'))) ORDER BY timestamp desc") + "STRING('bar'))) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -449,12 +545,12 @@ def test_negated_condition_value(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " - "CONTAINS STRING('foo')) ORDER BY timestamp desc") + "CONTAINS STRING('foo')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -490,14 +586,14 @@ def test_multiple_negated_condition_values(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " "CONTAINS STRING('foo') AND NOT resource CONTAINS " "STRING('baz') AND NOT resource CONTAINS " - "STRING('bar')) ORDER BY timestamp desc") + "STRING('bar')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -535,7 +631,7 @@ def test_empty_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -573,7 +669,7 @@ def test_incorrect_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -601,11 +697,11 @@ def test_empty_select(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT * FROM [dataset.2013_06_appspot_1] " "WHERE (start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " + "(start_time >= INTEGER('1371556954')) ORDER BY " "timestamp desc") self.assertEqual(result, expected_query) @@ -631,12 +727,12 @@ def test_no_alias(self): 'negate': False}], 'type': 'INTEGER'} ], - order_by={'field': 'start_time', 'direction': 'desc'}) + order_by={'fields': ['start_time'], 'direction': 'desc'}) expected_query = ("SELECT status , start_time , resource FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY start_time desc") + "INTEGER('1371556954')) ORDER BY start_time desc") expected_select = (field.strip() for field in expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -674,14 +770,14 @@ def test_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -725,7 +821,7 @@ def test_formatting_duplicate_columns(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " @@ -733,7 +829,7 @@ def test_formatting_duplicate_columns(self): "10) as day, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE " "(start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " + "(start_time >= INTEGER('1371556954')) ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -771,14 +867,14 @@ def test_sec_to_micro_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "SEC_TO_TIMESTAMP(INTEGER(start_time*1000000)) as " "timestamp, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -812,7 +908,7 @@ def test_no_table_or_dataset(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) self.assertIsNone(result) @@ -829,11 +925,11 @@ def test_empty_groupings(self): 'resource': {'alias': 'url'} }, groupings=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -844,7 +940,6 @@ def test_empty_groupings(self): six.assertCountEqual(self, expected_select, result_select) six.assertCountEqual(self, expected_from, result_from) - def test_multi_tables(self): """Ensure that render query arguments work with multiple tables.""" from bigquery.query_builder import render_query @@ -868,14 +963,14 @@ def test_multi_tables(self): 'type': 'INTEGER'}, ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1], " "[dataset.2013_07_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) GROUP BY timestamp, status " + "INTEGER('1371556954')) GROUP BY timestamp, status " "ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) diff --git a/bigquery/version.py b/bigquery/version.py new file mode 100644 index 0000000..0e1a38d --- /dev/null +++ b/bigquery/version.py @@ -0,0 +1 @@ +__version__ = '1.7.0' diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..3f83b08 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,216 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +.PHONY: clean +clean: + rm -rf $(BUILDDIR)/* + +.PHONY: html +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +.PHONY: dirhtml +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +.PHONY: singlehtml +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +.PHONY: pickle +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +.PHONY: json +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +.PHONY: htmlhelp +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +.PHONY: qthelp +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BigQuery-Python.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BigQuery-Python.qhc" + +.PHONY: applehelp +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +.PHONY: devhelp +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/BigQuery-Python" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BigQuery-Python" + @echo "# devhelp" + +.PHONY: epub +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +.PHONY: latex +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +.PHONY: latexpdf +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: latexpdfja +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: text +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +.PHONY: man +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +.PHONY: texinfo +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +.PHONY: info +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +.PHONY: gettext +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +.PHONY: changes +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +.PHONY: linkcheck +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +.PHONY: doctest +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +.PHONY: coverage +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +.PHONY: xml +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +.PHONY: pseudoxml +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..a97fc34 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# BigQuery-Python documentation build configuration file, created by +# sphinx-quickstart on Sat Apr 9 13:11:15 2016. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +#numpydoc_show_class_members = False + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +sys.path.insert(0, os.path.abspath('../')) +import bigquery + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.mathjax', + 'numpydoc', + 'sphinx.ext.autosummary' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'BigQuery-Python' +copyright = '2016, Tyler Treat' +author = 'Tyler Treat' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = bigquery.__version__ +# The full version, including alpha/beta/rc tags. +release = bigquery.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinxdoc' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'BigQuery-Pythondoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'BigQuery-Python.tex', 'BigQuery-Python Documentation', + 'Tyler Treat', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'bigquery-python', 'BigQuery-Python Documentation', + [author], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'BigQuery-Python', 'BigQuery-Python Documentation', + author, 'BigQuery-Python', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..0708835 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,29 @@ +.. BigQuery-Python documentation master file, created by + sphinx-quickstart on Sat Apr 9 13:11:15 2016. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to BigQuery-Python's documentation! +=========================================== + +Content +------- + +.. toctree:: + + pages/client + pages/query_builder + pages/schema_builder + +References +---------- +* `BigQuery-Python Source Code `_ +* `BigQuery API Reference `_ + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2b8c095 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,263 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + echo. coverage to run coverage check of the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +REM Check if sphinx-build is available and fallback to Python version if any +%SPHINXBUILD% 1>NUL 2>NUL +if errorlevel 9009 goto sphinx_python +goto sphinx_ok + +:sphinx_python + +set SPHINXBUILD=python -m sphinx.__init__ +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +:sphinx_ok + + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\BigQuery-Python.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\BigQuery-Python.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "coverage" ( + %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage + if errorlevel 1 exit /b 1 + echo. + echo.Testing of coverage in the sources finished, look at the ^ +results in %BUILDDIR%/coverage/python.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/docs/pages/client.rst b/docs/pages/client.rst new file mode 100644 index 0000000..f21a864 --- /dev/null +++ b/docs/pages/client.rst @@ -0,0 +1,13 @@ +.. _client: + +client +====== + +.. automodule:: bigquery.client + :members: + +:mod:`BigQueryClient` Class +--------------------------- + +.. autoclass:: bigquery.client.BigQueryClient + :members: diff --git a/docs/pages/query_builder.rst b/docs/pages/query_builder.rst new file mode 100644 index 0000000..4053073 --- /dev/null +++ b/docs/pages/query_builder.rst @@ -0,0 +1,7 @@ +.. _query_builder + +query_builder +============= + +.. automodule:: bigquery.query_builder + :members: diff --git a/docs/pages/schema_builder.rst b/docs/pages/schema_builder.rst new file mode 100644 index 0000000..0d16def --- /dev/null +++ b/docs/pages/schema_builder.rst @@ -0,0 +1,7 @@ +.. _schema_builder + +schema_builder +============== + +.. automodule:: bigquery.schema_builder + :members: diff --git a/setup.py b/setup.py index 9878d9d..fc1c5de 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,17 @@ +from distutils.util import convert_path from setuptools import find_packages from setuptools import setup -VERSION = '1.4.1' +ns = {} +version_path = convert_path('bigquery/version.py') +with open(version_path) as version_file: + exec(version_file.read(), ns) setup_args = dict( name='BigQuery-Python', description='Simple Python client for interacting with Google BigQuery.', url='https://github.com/tylertreat/BigQuery-Python', - version=VERSION, + version=ns['__version__'], license='Apache', packages=find_packages(), include_package_data=True,