diff --git a/README.md b/README.md index f5c931f..f4e2f66 100644 --- a/README.md +++ b/README.md @@ -99,13 +99,32 @@ conditions = [ } ] +grouping = ['Timestamp'] + +having = [ + { + 'field': 'Timestamp', + 'type': 'INTEGER', + 'comparators': [ + { + 'condition': '==', + 'negate': False, + 'value': 1399478981 + } + ] + } +] + +order_by ={'fields': ['Timestamp'], 'direction': 'desc'} + query = render_query( 'dataset', ['table'], select=selects, conditions=conditions, - groupings=['Timestamp'], - order_by={'field': 'Timestamp', 'direction': 'desc'} + groupings=grouping, + having=having, + order_by=order_by ) job_id, _ = client.query(query) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 1cfa72a..fc6d90c 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -2,7 +2,7 @@ def render_query(dataset, tables, select=None, conditions=None, - groupings=None, order_by=None): + groupings=None, having=None, order_by=None): """Render a query that will run over the given tables using the specified parameters. @@ -46,12 +46,13 @@ def render_query(dataset, tables, select=None, conditions=None, if None in (dataset, tables): return None - query = "%s %s %s %s %s" % ( + query = "%s %s %s %s %s %s" % ( _render_select(select), _render_sources(dataset, tables), _render_conditions(conditions), _render_groupings(groupings), - _render_order(order_by), + _render_having(having), + _render_order(order_by) ) return query @@ -133,8 +134,21 @@ def _render_sources(dataset, tables): a string that represents the from part of a query. """ - return "FROM " + ", ".join( - ["[%s.%s]" % (dataset, table) for table in tables]) + if isinstance(tables, dict): + if tables.get('date_range', False): + try: + dataset_table = '.'.join([dataset, tables['table']]) + return "FROM (TABLE_DATE_RANGE([{}], TIMESTAMP('{}'),"\ + " TIMESTAMP('{}'))) ".format(dataset_table, + tables['from_date'], + tables['to_date']) + except KeyError as exp: + logging.warn('Missing parameter %s in selecting sources' % + (exp)) + + else: + return "FROM " + ", ".join( + ["[%s.%s]" % (dataset, table) for table in tables]) def _render_conditions(conditions): @@ -206,6 +220,15 @@ def _render_condition(field, field_type, comparators): else: value = _render_condition_value(value, field_type) value = "(" + value + ")" + elif condition == "BETWEEN": + if isinstance(value, (tuple, list, set)) and len(value) == 2: + value = ' AND '.join( + sorted([_render_condition_value(v, field_type) + for v in value]) + ) + elif isinstance(value, (tuple, list, set)) and len(value) != 2: + logging.warn('Invalid condition passed in: %s' % condition) + else: value = _render_condition_value(value, field_type) @@ -242,38 +265,76 @@ def _render_condition_value(value, field_type): value = 1 if value else 0 elif field_type in ("STRING", "INTEGER", "FLOAT"): value = "'%s'" % (value) + elif field_type in ("TIMESTAMP"): + value = "'%s'" % (str(value)) return "%s(%s)" % (field_type, value) -def _render_order(order): - """Render the order by part of a query. +def _render_groupings(fields): + """Render the group by part of a query. Args: - order: a dictionary with two keys, field and direction. - Such that the dictionary should be formatted as - {'field':'TimeStamp, 'direction':'desc'}. + fields: a list of fields to group by. Returns: - a string that represents the order by part of a query. + a string that represents the group by part of a query. """ - if not order or 'field' not in order or 'direction' not in order: - return '' + if not fields: + return "" - return "ORDER BY %s %s" % (order['field'], order['direction']) + return "GROUP BY " + ", ".join(fields) -def _render_groupings(fields): - """Render the group by part of a query. +def _render_having(having_conditions): + """Render the having part of a query. Args: - fields: a list of fields to group by. + conditions: a list of dictionary items to filter the rows. + Each dict should be formatted as {'field': 'start_time', + 'value': {'value': 1, 'negate': False}, 'comparator': '>', + 'type': 'FLOAT'} which is represetned as + 'start_time > FLOAT('1')' in the query. Returns: - a string that represents the group by part of a query. + a string that represents the having part of a query. """ + if not having_conditions: + return "" - if not fields: + rendered_conditions = [] + + for condition in having_conditions: + field = condition.get('field') + field_type = condition.get('type') + comparators = condition.get('comparators') + + if None in (field, field_type, comparators) or not comparators: + logging.warn('Invalid condition passed in: %s' % condition) + continue + + rendered_conditions.append( + _render_condition(field, field_type, comparators)) + + if not rendered_conditions: return "" - return "GROUP BY " + ", ".join(fields) + return "HAVING %s" % (" AND ".join(rendered_conditions)) + + +def _render_order(order): + """Render the order by part of a query. + + Args: + order: a dictionary with two keys, fields and direction. + Such that the dictionary should be formatted as + {'fields': ['TimeStamp'], 'direction':'desc'}. + + Returns: + a string that represents the order by part of a query. + """ + + if not order or 'fields' not in order or 'direction' not in order: + return '' + + return "ORDER BY %s %s" % (", ".join(order['fields']), order['direction']) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 8591c6b..df37a3e 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -80,6 +80,22 @@ def test_no_dataset(self): self.assertEqual(result, 'FROM [.man], [.pig], [.bro]') + def test_tables_in_date_range(self): + """Ensure that render sources can handle tables in DATE RANGE.""" + from bigquery.query_builder import _render_sources + + tables = { + 'date_range': True, + 'from_date': '2015-08-23', + 'to_date': '2015-10-10', + 'table': 'pets_' + } + + result = _render_sources('animals', tables) + + self.assertEqual(result, "FROM (TABLE_DATE_RANGE([animals.pets_], " + "TIMESTAMP('2015-08-23'), TIMESTAMP('2015-10-10'))) ") + class TestRenderConditions(unittest.TestCase): @@ -218,6 +234,42 @@ def test_in_comparator(self): "foobar IN (STRING('n'))))" [len('WHERE '):] .split(' AND ')) + def test_between_comparator(self): + """Ensure that render conditions can handle "BETWEEN" condition.""" + from bigquery.query_builder import _render_conditions + + result = _render_conditions([ + { + 'field': 'foobar', + 'type': 'STRING', + 'comparators': [ + {'condition': 'BETWEEN', 'negate': False, + 'value': ['a', 'b']}, + {'condition': 'BETWEEN', 'negate': False, + 'value': {'c', 'd'}}, + {'condition': 'BETWEEN', 'negate': False, + 'value': ('e', 'f')}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ['h', 'i']}, + {'condition': 'BETWEEN', 'negate': True, + 'value': {'j', 'k'}}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ('l', 'm')} + ] + } + ]) + + six.assertCountEqual(self, result[len('WHERE '):].split(' AND '), + "WHERE ((foobar BETWEEN STRING('a') AND " + "STRING('b') AND foobar BETWEEN STRING('c') " + "AND STRING('d') AND foobar BETWEEN " + "STRING('e') AND STRING('f')) AND (NOT foobar " + "BETWEEN STRING('h') AND STRING('i') AND NOT " + "foobar BETWEEN STRING('j') AND STRING('k') " + "AND NOT foobar BETWEEN STRING('l') AND " + "STRING('m')))" [len('WHERE '):] + .split(' AND ')) + class TestRenderOrder(unittest.TestCase): @@ -225,7 +277,7 @@ def test_order(self): """Ensure that render order can work under expected conditions.""" from bigquery.query_builder import _render_order - result = _render_order({'field': 'foo', 'direction': 'desc'}) + result = _render_order({'fields': ['foo'], 'direction': 'desc'}) self.assertEqual(result, "ORDER BY foo desc") @@ -259,6 +311,35 @@ def test_no_fields(self): self.assertEqual(result, "") +class TestRenderHaving(unittest.TestCase): + + def test_mutliple_fields(self): + """Ensure that render having works with multiple fields.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having([ + { + 'field': 'bar', + 'type': 'STRING', + 'comparators': [ + {'condition': '>=', 'negate': False, 'value': '1'} + ] + } + ]) + + self.assertEqual(result, "HAVING (bar >= STRING('1'))") + + def test_no_fields(self): + """Ensure that render having can work with out any arguments.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having(None) + + self.assertEqual(result, "") + + class TestRenderQuery(unittest.TestCase): def test_full_query(self): @@ -298,13 +379,27 @@ def test_full_query(self): } ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + having=[ + { + 'field': 'status', + 'comparators': [ + { + 'condition': '==', + 'value': 1, + 'negate': False + } + ], + 'type': 'INTEGER' + } + ], + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM [dataset.2013_06_appspot_1]" " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " - "timestamp, status ORDER BY timestamp desc") + "timestamp, status HAVING (status == INTEGER('1')) " + "ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -327,17 +422,18 @@ def test_empty_conditions(self): 'resource': {'alias': 'url'} }, conditions=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] result_select = (result[len('SELECT '):].split('FROM')[0] .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] six.assertCountEqual(self, expected_select, result_select) six.assertCountEqual(self, expected_from, result_from) @@ -363,11 +459,11 @@ def test_incorrect_conditions(self): 'negate': False}, 'compoorattor': '>=', 'type': 'INTEGER'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -411,7 +507,7 @@ def test_multiple_condition_values(self): 'negate': False}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " @@ -420,7 +516,7 @@ def test_multiple_condition_values(self): "INTEGER('1371556954')) AND " "((resource CONTAINS STRING('foo') AND resource " "CONTAINS STRING('baz')) AND (NOT resource CONTAINS " - "STRING('bar'))) ORDER BY timestamp desc") + "STRING('bar'))) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -449,12 +545,12 @@ def test_negated_condition_value(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " - "CONTAINS STRING('foo')) ORDER BY timestamp desc") + "CONTAINS STRING('foo')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -490,14 +586,14 @@ def test_multiple_negated_condition_values(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " "CONTAINS STRING('foo') AND NOT resource CONTAINS " "STRING('baz') AND NOT resource CONTAINS " - "STRING('bar')) ORDER BY timestamp desc") + "STRING('bar')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -535,7 +631,7 @@ def test_empty_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -573,7 +669,7 @@ def test_incorrect_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -601,11 +697,11 @@ def test_empty_select(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT * FROM [dataset.2013_06_appspot_1] " "WHERE (start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " + "(start_time >= INTEGER('1371556954')) ORDER BY " "timestamp desc") self.assertEqual(result, expected_query) @@ -631,12 +727,12 @@ def test_no_alias(self): 'negate': False}], 'type': 'INTEGER'} ], - order_by={'field': 'start_time', 'direction': 'desc'}) + order_by={'fields': ['start_time'], 'direction': 'desc'}) expected_query = ("SELECT status , start_time , resource FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY start_time desc") + "INTEGER('1371556954')) ORDER BY start_time desc") expected_select = (field.strip() for field in expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -674,14 +770,14 @@ def test_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -725,7 +821,7 @@ def test_formatting_duplicate_columns(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " @@ -733,7 +829,7 @@ def test_formatting_duplicate_columns(self): "10) as day, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE " "(start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " + "(start_time >= INTEGER('1371556954')) ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -771,14 +867,14 @@ def test_sec_to_micro_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "SEC_TO_TIMESTAMP(INTEGER(start_time*1000000)) as " "timestamp, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -812,7 +908,7 @@ def test_no_table_or_dataset(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) self.assertIsNone(result) @@ -829,11 +925,11 @@ def test_empty_groupings(self): 'resource': {'alias': 'url'} }, groupings=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -844,7 +940,6 @@ def test_empty_groupings(self): six.assertCountEqual(self, expected_select, result_select) six.assertCountEqual(self, expected_from, result_from) - def test_multi_tables(self): """Ensure that render query arguments work with multiple tables.""" from bigquery.query_builder import render_query @@ -868,14 +963,14 @@ def test_multi_tables(self): 'type': 'INTEGER'}, ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1], " "[dataset.2013_07_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) GROUP BY timestamp, status " + "INTEGER('1371556954')) GROUP BY timestamp, status " "ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', '))