From 361376e5895e36dce25b4da597717c4e18e4c8ae Mon Sep 17 00:00:00 2001 From: Ivan Toriya Date: Mon, 25 Sep 2023 18:44:44 +0200 Subject: [PATCH 1/4] add impersonate_service_account --- data_diff/dbt_parser.py | 1 + data_diff/sqeleton/databases/bigquery.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/data_diff/dbt_parser.py b/data_diff/dbt_parser.py index 33299124..e3faf0d3 100644 --- a/data_diff/dbt_parser.py +++ b/data_diff/dbt_parser.py @@ -359,6 +359,7 @@ def set_connection(self): "driver": conn_type, "project": credentials.get("project"), "dataset": credentials.get("dataset"), + "impersonate_service_account": credentials.get("impersonate_service_account"), } self.threads = credentials.get("threads") diff --git a/data_diff/sqeleton/databases/bigquery.py b/data_diff/sqeleton/databases/bigquery.py index 2c0a57ca..abcc9d42 100644 --- a/data_diff/sqeleton/databases/bigquery.py +++ b/data_diff/sqeleton/databases/bigquery.py @@ -42,6 +42,12 @@ def import_bigquery_service_account(): return service_account +def import_bigquery_service_account_impersonation(): + from google.auth import impersonated_credentials + + return impersonated_credentials + + class Mixin_MD5(AbstractMixin_MD5): def md5_as_int(self, s: str) -> str: return f"cast(cast( ('0x' || substr(TO_HEX(md5({s})), 18)) as int64) as numeric)" @@ -221,7 +227,14 @@ def __init__(self, project, *, dataset, bigquery_credentials=None, **kw): keyfile, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) - + elif kw.get("impersonate_service_account"): + bigquery_service_account_impersonation = import_bigquery_service_account_impersonation() + credentials = bigquery_service_account_impersonation.Credentials( + source_credentials=credentials, + target_principal=kw["impersonate_service_account"], + target_scopes=["https://www.googleapis.com/auth/cloud-platform"], + ) + self._client = bigquery.Client(project=project, credentials=credentials, **kw) self.project = project self.dataset = dataset From ca51e04227c7197078b699bf61379e7e7a63f689 Mon Sep 17 00:00:00 2001 From: Ivan Toriya Date: Mon, 25 Sep 2023 18:46:55 +0200 Subject: [PATCH 2/4] add test --- tests/test_dbt_parser.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_dbt_parser.py b/tests/test_dbt_parser.py index 4e8b20d5..e106917a 100644 --- a/tests/test_dbt_parser.py +++ b/tests/test_dbt_parser.py @@ -269,6 +269,28 @@ def test_set_connection_bigquery_oauth(self): self.assertEqual(mock_self.connection.get("project"), expected_credentials["project"]) self.assertEqual(mock_self.connection.get("dataset"), expected_credentials["dataset"]) + def test_set_connection_bigquery_oauth_sa_impersonation(self): + expected_driver = "bigquery" + expected_credentials = { + "method": "oauth", + "project": "a_project", + "dataset": "a_dataset", + "impersonate_service_account": "a_service_account@yourproject.iam.gserviceaccount.com", + } + mock_self = Mock() + mock_self.get_connection_creds.return_value = (expected_credentials, expected_driver) + + DbtParser.set_connection(mock_self) + + self.assertIsInstance(mock_self.connection, dict) + self.assertEqual(mock_self.connection.get("driver"), expected_driver) + self.assertEqual(mock_self.connection.get("project"), expected_credentials["project"]) + self.assertEqual(mock_self.connection.get("dataset"), expected_credentials["dataset"]) + self.assertEqual( + mock_self.connection.get("impersonate_service_account"), + expected_credentials["impersonate_service_account"], + ) + def test_set_connection_bigquery_svc_account(self): expected_driver = "bigquery" expected_credentials = { From 459fddeb95c3652e6304ea6d6b3825929e7ad32f Mon Sep 17 00:00:00 2001 From: Ivan Toriya Date: Tue, 10 Oct 2023 10:45:18 +0200 Subject: [PATCH 3/4] cancel isort --- data_diff/dbt_parser.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/data_diff/dbt_parser.py b/data_diff/dbt_parser.py index 87905055..23849f83 100644 --- a/data_diff/dbt_parser.py +++ b/data_diff/dbt_parser.py @@ -1,16 +1,17 @@ -import json from argparse import Namespace from collections import defaultdict +import json from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any, List, Dict, Tuple, Set, Optional import attrs import yaml -from dbt.config.renderer import ProfileRenderer -from packaging.version import parse as parse_version from pydantic import BaseModel +from packaging.version import parse as parse_version +from dbt.config.renderer import ProfileRenderer from data_diff.dbt_config_validators import ManifestJsonConfig, RunResultsJsonConfig + from data_diff.errors import ( DataDiffDbtBigQueryUnsupportedMethodError, DataDiffDbtConnectionNotImplementedError, @@ -24,7 +25,9 @@ DataDiffDbtSnowflakeSetConnectionError, DataDiffSimpleSelectNotFound, ) -from data_diff.utils import get_from_dict_with_raise, getLogger + +from data_diff.utils import getLogger, get_from_dict_with_raise + logger = getLogger(__name__) From 8055000342bc2bd1cb53a5b89a00059b86ff6b33 Mon Sep 17 00:00:00 2001 From: Dan Lawin Date: Fri, 13 Oct 2023 15:36:00 -0600 Subject: [PATCH 4/4] alternate project/dataset keys --- data_diff/dbt_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_diff/dbt_parser.py b/data_diff/dbt_parser.py index 23849f83..5ca183cd 100644 --- a/data_diff/dbt_parser.py +++ b/data_diff/dbt_parser.py @@ -376,8 +376,8 @@ def set_connection(self): conn_info = { "driver": conn_type, - "project": credentials.get("project"), - "dataset": credentials.get("dataset"), + "project": credentials.get("project") or credentials.get("database"), + "dataset": credentials.get("dataset") or credentials.get("schema"), "impersonate_service_account": credentials.get("impersonate_service_account"), }