From b3084ccd91330d49a299abf47586aa5319bf5492 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Wed, 25 Oct 2023 16:57:32 +0200 Subject: [PATCH 1/3] Remove unused code: random sampling --- data_diff/abcs/mixins.py | 18 ------------------ data_diff/databases/base.py | 11 ----------- data_diff/databases/duckdb.py | 9 +-------- 3 files changed, 1 insertion(+), 37 deletions(-) diff --git a/data_diff/abcs/mixins.py b/data_diff/abcs/mixins.py index e597f480..72f49c55 100644 --- a/data_diff/abcs/mixins.py +++ b/data_diff/abcs/mixins.py @@ -141,24 +141,6 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: """ -@attrs.define(frozen=False) -class AbstractMixin_RandomSample(AbstractMixin): - @abstractmethod - def random_sample_n(self, tbl: str, size: int) -> str: - """Take a random sample of the given size, i.e. return 'size' amount of rows""" - - @abstractmethod - def random_sample_ratio_approx(self, tbl: str, ratio: float) -> str: - """Take a random sample of the approximate size determined by the ratio (0..1), where 0 means no rows, and 1 means all rows - - i.e. the actual mount of rows returned may vary by standard deviation. - """ - - # def random_sample_ratio(self, table: ITable, ratio: float): - # """Take a random sample of the size determined by the ratio (0..1), where 0 means no rows, and 1 means all rows - # """ - - @attrs.define(frozen=False) class AbstractMixin_TimeTravel(AbstractMixin): @abstractmethod diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index 66e2e26a..8073e1ac 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -77,7 +77,6 @@ from data_diff.abcs.mixins import AbstractMixin_TimeTravel, Compilable from data_diff.abcs.mixins import ( AbstractMixin_Schema, - AbstractMixin_RandomSample, AbstractMixin_NormalizeValue, AbstractMixin_OptimizerHints, ) @@ -218,16 +217,6 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: ) -@attrs.define(frozen=False) -class Mixin_RandomSample(AbstractMixin_RandomSample): - def random_sample_n(self, tbl: ITable, size: int) -> ITable: - # TODO use a more efficient algorithm, when the table count is known - return tbl.order_by(Random()).limit(size) - - def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable: - return tbl.where(Random() < ratio) - - @attrs.define(frozen=False) class Mixin_OptimizerHints(AbstractMixin_OptimizerHints): def optimizer_hints(self, hints: str) -> str: diff --git a/data_diff/databases/duckdb.py b/data_diff/databases/duckdb.py index 12873358..25a22d0a 100644 --- a/data_diff/databases/duckdb.py +++ b/data_diff/databases/duckdb.py @@ -20,7 +20,6 @@ from data_diff.abcs.mixins import ( AbstractMixin_MD5, AbstractMixin_NormalizeValue, - AbstractMixin_RandomSample, ) from data_diff.databases.base import ( Database, @@ -44,7 +43,7 @@ def import_duckdb(): @attrs.define(frozen=False) -class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_RandomSample): +class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "DuckDB" ROUNDS_ON_PREC_LOSS = False SUPPORTS_PRIMARY_KEY = True @@ -120,12 +119,6 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str: def normalize_boolean(self, value: str, _coltype: Boolean) -> str: return self.to_string(f"{value}::INTEGER") - def random_sample_n(self, tbl: ITable, size: int) -> ITable: - return code("SELECT * FROM ({tbl}) USING SAMPLE {size};", tbl=tbl, size=size) - - def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable: - return code("SELECT * FROM ({tbl}) USING SAMPLE {percent}%;", tbl=tbl, percent=int(100 * ratio)) - @attrs.define(frozen=False, init=False, kw_only=True) class DuckDB(Database): From 1b99100440e15c4bc72fdf73b50efe19d6df96cd Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Wed, 25 Oct 2023 16:59:37 +0200 Subject: [PATCH 2/3] Remove unused code: time travelling --- data_diff/abcs/mixins.py | 23 ----------------------- data_diff/databases/base.py | 15 +-------------- data_diff/databases/bigquery.py | 30 +----------------------------- data_diff/databases/snowflake.py | 29 +---------------------------- data_diff/queries/ast_classes.py | 29 ----------------------------- 5 files changed, 3 insertions(+), 123 deletions(-) diff --git a/data_diff/abcs/mixins.py b/data_diff/abcs/mixins.py index 72f49c55..49fd1df2 100644 --- a/data_diff/abcs/mixins.py +++ b/data_diff/abcs/mixins.py @@ -141,29 +141,6 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: """ -@attrs.define(frozen=False) -class AbstractMixin_TimeTravel(AbstractMixin): - @abstractmethod - def time_travel( - self, - table: Compilable, - before: bool = False, - timestamp: Compilable = None, - offset: Compilable = None, - statement: Compilable = None, - ) -> Compilable: - """Selects historical data from a table - - Parameters: - table - The name of the table whose history we're querying - timestamp - A constant timestamp - offset - the time 'offset' seconds before now - statement - identifier for statement, e.g. query ID - - Must specify exactly one of `timestamp`, `offset` or `statement`. - """ - - @attrs.define(frozen=False) class AbstractMixin_OptimizerHints(AbstractMixin): @abstractmethod diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index 8073e1ac..e97c08d3 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -48,7 +48,6 @@ TableAlias, TableOp, TablePath, - TimeTravel, TruncateTable, UnaryOp, WhenThen, @@ -74,7 +73,7 @@ Boolean, JSON, ) -from data_diff.abcs.mixins import AbstractMixin_TimeTravel, Compilable +from data_diff.abcs.mixins import Compilable from data_diff.abcs.mixins import ( AbstractMixin_Schema, AbstractMixin_NormalizeValue, @@ -327,8 +326,6 @@ def render_compilable(self, c: Compiler, elem: Compilable) -> str: return self.render_explain(c, elem) elif isinstance(elem, CurrentTimestamp): return self.render_currenttimestamp(c, elem) - elif isinstance(elem, TimeTravel): - return self.render_timetravel(c, elem) elif isinstance(elem, CreateTable): return self.render_createtable(c, elem) elif isinstance(elem, DropTable): @@ -605,16 +602,6 @@ def render_explain(self, c: Compiler, elem: Explain) -> str: def render_currenttimestamp(self, c: Compiler, elem: CurrentTimestamp) -> str: return self.current_timestamp() - def render_timetravel(self, c: Compiler, elem: TimeTravel) -> str: - assert isinstance(c, AbstractMixin_TimeTravel) - return self.compile( - c, - # TODO: why is it c.? why not self? time-trvelling is the dialect's thing, isnt't it? - c.time_travel( - elem.table, before=elem.before, timestamp=elem.timestamp, offset=elem.offset, statement=elem.statement - ), - ) - def render_createtable(self, c: Compiler, elem: CreateTable) -> str: ne = "IF NOT EXISTS " if elem.if_not_exists else "" if elem.source_table: diff --git a/data_diff/databases/bigquery.py b/data_diff/databases/bigquery.py index 52ef6f9f..a1bc33cf 100644 --- a/data_diff/databases/bigquery.py +++ b/data_diff/databases/bigquery.py @@ -24,7 +24,6 @@ AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema, - AbstractMixin_TimeTravel, ) from data_diff.abcs.compiler import Compilable from data_diff.queries.api import this, table, SKIP, code @@ -63,9 +62,7 @@ def import_bigquery_service_account_impersonation(): @attrs.define(frozen=False) -class Dialect( - BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_TimeTravel -): +class Dialect(BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "BigQuery" ROUNDS_ON_PREC_LOSS = False # Technically BigQuery doesn't allow implicit rounding or truncation TYPE_CLASSES = { @@ -197,31 +194,6 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: .select(this.table_name) ) - def time_travel( - self, - table: Compilable, - before: bool = False, - timestamp: Compilable = None, - offset: Compilable = None, - statement: Compilable = None, - ) -> Compilable: - if before: - raise NotImplementedError("before=True not supported for BigQuery time-travel") - - if statement is not None: - raise NotImplementedError("BigQuery time-travel doesn't support querying by statement id") - - if timestamp is not None: - assert offset is None - return code("{table} FOR SYSTEM_TIME AS OF {timestamp}", table=table, timestamp=timestamp) - - assert offset is not None - return code( - "{table} FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL {offset} HOUR);", - table=table, - offset=offset, - ) - @attrs.define(frozen=False, init=False, kw_only=True) class BigQuery(Database): diff --git a/data_diff/databases/snowflake.py b/data_diff/databases/snowflake.py index b80845e3..63a7f3e2 100644 --- a/data_diff/databases/snowflake.py +++ b/data_diff/databases/snowflake.py @@ -19,7 +19,6 @@ AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema, - AbstractMixin_TimeTravel, ) from data_diff.abcs.compiler import Compilable from data_diff.queries.api import table, this, SKIP, code @@ -43,9 +42,7 @@ def import_snowflake(): return snowflake, serialization, default_backend -class Dialect( - BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_TimeTravel -): +class Dialect(BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "Snowflake" ROUNDS_ON_PREC_LOSS = False TYPE_CLASSES = { @@ -117,30 +114,6 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: .select(table_name=this.TABLE_NAME) ) - def time_travel( - self, - table: Compilable, - before: bool = False, - timestamp: Compilable = None, - offset: Compilable = None, - statement: Compilable = None, - ) -> Compilable: - at_or_before = "AT" if before else "BEFORE" - if timestamp is not None: - assert offset is None and statement is None - key = "timestamp" - value = timestamp - elif offset is not None: - assert statement is None - key = "offset" - value = offset - else: - assert statement is not None - key = "statement" - value = statement - - return code(f"{{table}} {at_or_before}({key} => {{value}})", table=table, value=value) - @attrs.define(frozen=False, init=False, kw_only=True) class Snowflake(Database): diff --git a/data_diff/queries/ast_classes.py b/data_diff/queries/ast_classes.py index 53e83e45..9c140a5f 100644 --- a/data_diff/queries/ast_classes.py +++ b/data_diff/queries/ast_classes.py @@ -457,26 +457,6 @@ def insert_expr(self, expr: Expr): expr = expr.select() return InsertToTable(self, expr) - def time_travel( - self, *, before: bool = False, timestamp: datetime = None, offset: int = None, statement: str = None - ) -> Compilable: - """Selects historical data from the table - - Parameters: - before: If false, inclusive of the specified point in time. - If True, only return the time before it. (at/before) - timestamp: A constant timestamp - offset: the time 'offset' seconds before now - statement: identifier for statement, e.g. query ID - - Must specify exactly one of `timestamp`, `offset` or `statement`. - """ - if sum(int(i is not None) for i in (timestamp, offset, statement)) != 1: - raise ValueError("Must specify exactly one of `timestamp`, `offset` or `statement`.") - - if timestamp is not None: - assert offset is None and statement is None - @attrs.define(frozen=True, eq=False) class TableAlias(ExprNode, ITable): @@ -752,15 +732,6 @@ def type(self) -> Optional[type]: return datetime -@attrs.define(frozen=True, eq=False) -class TimeTravel(ITable): # TODO: Unused? - table: TablePath - before: bool = False - timestamp: Optional[datetime] = None - offset: Optional[int] = None - statement: Optional[str] = None - - # DDL From 93c128b7713ab3db8c319b82bc1886d5b9053e35 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Wed, 25 Oct 2023 17:09:19 +0200 Subject: [PATCH 3/3] Remove unused code: schema fetching via dialects The schemas are currently fetching via `Database.select_table_schema()` & `Database.query_table_schema()`, so this dialectic code is unused. It is also questionable that the schema fetching must be done via dialects at all: some databases can provide an API (e.g. HTTP API) to fetch the database metadata, such as Databricks. So it belongs to the database, not to dialects. --- data_diff/abcs/mixins.py | 19 ---------------- data_diff/databases/base.py | 25 -------------------- data_diff/databases/bigquery.py | 14 +----------- data_diff/databases/duckdb.py | 6 ++--- data_diff/databases/mssql.py | 2 -- data_diff/databases/mysql.py | 2 -- data_diff/databases/oracle.py | 15 +----------- data_diff/databases/postgresql.py | 4 ++-- data_diff/databases/presto.py | 3 +-- data_diff/databases/snowflake.py | 20 +--------------- data_diff/databases/vertica.py | 19 ++-------------- tests/test_database.py | 38 ------------------------------- 12 files changed, 10 insertions(+), 157 deletions(-) diff --git a/data_diff/abcs/mixins.py b/data_diff/abcs/mixins.py index 49fd1df2..3bc566e5 100644 --- a/data_diff/abcs/mixins.py +++ b/data_diff/abcs/mixins.py @@ -122,25 +122,6 @@ def md5_as_int(self, s: str) -> str: "Provide SQL for computing md5 and returning an int" -@attrs.define(frozen=False) -class AbstractMixin_Schema(AbstractMixin): - """Methods for querying the database schema - - TODO: Move AbstractDatabase.query_table_schema() and friends over here - """ - - def table_information(self) -> Compilable: - "Query to return a table of schema information about existing tables" - raise NotImplementedError() - - @abstractmethod - def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: - """Query to select the list of tables in the schema. (query return type: table[str]) - - If 'like' is specified, the value is applied to the table name, using the 'like' operator. - """ - - @attrs.define(frozen=False) class AbstractMixin_OptimizerHints(AbstractMixin): @abstractmethod diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index e97c08d3..1e7fb332 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -75,7 +75,6 @@ ) from data_diff.abcs.mixins import Compilable from data_diff.abcs.mixins import ( - AbstractMixin_Schema, AbstractMixin_NormalizeValue, AbstractMixin_OptimizerHints, ) @@ -199,23 +198,6 @@ def apply_query(callback: Callable[[str], Any], sql_code: Union[str, ThreadLocal return callback(sql_code) -@attrs.define(frozen=False) -class Mixin_Schema(AbstractMixin_Schema): - def table_information(self) -> Compilable: - return table("information_schema", "tables") - - def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: - return ( - self.table_information() - .where( - this.table_schema == table_schema, - this.table_name.like(like) if like is not None else SKIP, - this.table_type == "BASE TABLE", - ) - .select(this.table_name) - ) - - @attrs.define(frozen=False) class Mixin_OptimizerHints(AbstractMixin_OptimizerHints): def optimizer_hints(self, hints: str) -> str: @@ -1019,10 +1001,6 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe assert col_name in col_dict col_dict[col_name] = String_VaryingAlphanum() - # @lru_cache() - # def get_table_schema(self, path: DbPath) -> Dict[str, ColType]: - # return self.query_table_schema(path) - def _normalize_table_path(self, path: DbPath) -> DbPath: if len(path) == 1: return self.default_schema, path[0] @@ -1056,9 +1034,6 @@ def close(self): self.is_closed = True return super().close() - def list_tables(self, tables_like, schema=None): - return self.query(self.dialect.list_tables(schema or self.default_schema, tables_like)) - @property @abstractmethod def dialect(self) -> BaseDialect: diff --git a/data_diff/databases/bigquery.py b/data_diff/databases/bigquery.py index a1bc33cf..02e19323 100644 --- a/data_diff/databases/bigquery.py +++ b/data_diff/databases/bigquery.py @@ -23,7 +23,6 @@ from data_diff.abcs.mixins import ( AbstractMixin_MD5, AbstractMixin_NormalizeValue, - AbstractMixin_Schema, ) from data_diff.abcs.compiler import Compilable from data_diff.queries.api import this, table, SKIP, code @@ -62,7 +61,7 @@ def import_bigquery_service_account_impersonation(): @attrs.define(frozen=False) -class Dialect(BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): +class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "BigQuery" ROUNDS_ON_PREC_LOSS = False # Technically BigQuery doesn't allow implicit rounding or truncation TYPE_CLASSES = { @@ -183,17 +182,6 @@ def normalize_struct(self, value: str, _coltype: Struct) -> str: # match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc. return f"to_json_string({value})" - def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: - return ( - table(table_schema, "INFORMATION_SCHEMA", "TABLES") - .where( - this.table_schema == table_schema, - this.table_name.like(like) if like is not None else SKIP, - this.table_type == "BASE TABLE", - ) - .select(this.table_name) - ) - @attrs.define(frozen=False, init=False, kw_only=True) class BigQuery(Database): diff --git a/data_diff/databases/duckdb.py b/data_diff/databases/duckdb.py index 25a22d0a..43edcd3f 100644 --- a/data_diff/databases/duckdb.py +++ b/data_diff/databases/duckdb.py @@ -30,9 +30,7 @@ TIMESTAMP_PRECISION_POS, CHECKSUM_OFFSET, ) -from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, Mixin_Schema -from data_diff.queries.ast_classes import ITable -from data_diff.queries.api import code +from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS @import_helper("duckdb") @@ -43,7 +41,7 @@ def import_duckdb(): @attrs.define(frozen=False) -class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): +class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "DuckDB" ROUNDS_ON_PREC_LOSS = False SUPPORTS_PRIMARY_KEY = True diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py index 53986a61..1ada701e 100644 --- a/data_diff/databases/mssql.py +++ b/data_diff/databases/mssql.py @@ -13,7 +13,6 @@ ConnectError, BaseDialect, ) -from data_diff.databases.base import Mixin_Schema from data_diff.abcs.database_types import ( JSON, NumericType, @@ -40,7 +39,6 @@ def import_mssql(): @attrs.define(frozen=False) class Dialect( BaseDialect, - Mixin_Schema, Mixin_OptimizerHints, AbstractMixin_MD5, AbstractMixin_NormalizeValue, diff --git a/data_diff/databases/mysql.py b/data_diff/databases/mysql.py index d0072267..6b11068b 100644 --- a/data_diff/databases/mysql.py +++ b/data_diff/databases/mysql.py @@ -31,7 +31,6 @@ CHECKSUM_HEXDIGITS, TIMESTAMP_PRECISION_POS, CHECKSUM_OFFSET, - Mixin_Schema, ) @@ -45,7 +44,6 @@ def import_mysql(): @attrs.define(frozen=False) class Dialect( BaseDialect, - Mixin_Schema, Mixin_OptimizerHints, AbstractMixin_MD5, AbstractMixin_NormalizeValue, diff --git a/data_diff/databases/oracle.py b/data_diff/databases/oracle.py index 5574998a..bcba374d 100644 --- a/data_diff/databases/oracle.py +++ b/data_diff/databases/oracle.py @@ -16,9 +16,7 @@ TimestampTZ, FractionalType, ) -from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema -from data_diff.abcs.compiler import Compilable -from data_diff.queries.api import this, table, SKIP +from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue from data_diff.databases.base import ( BaseDialect, Mixin_OptimizerHints, @@ -46,7 +44,6 @@ def import_oracle(): class Dialect( BaseDialect, Mixin_OptimizerHints, - AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue, ): @@ -162,16 +159,6 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str: format_str += "0." + "9" * (coltype.precision - 1) + "0" return f"to_char({value}, '{format_str}')" - def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: - return ( - table("ALL_TABLES") - .where( - this.OWNER == table_schema, - this.TABLE_NAME.like(like) if like is not None else SKIP, - ) - .select(table_name=this.TABLE_NAME) - ) - @attrs.define(frozen=False, init=False, kw_only=True) class Oracle(ThreadedDatabase): diff --git a/data_diff/databases/postgresql.py b/data_diff/databases/postgresql.py index 22403574..6bc3d488 100644 --- a/data_diff/databases/postgresql.py +++ b/data_diff/databases/postgresql.py @@ -19,7 +19,7 @@ Date, ) from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue -from data_diff.databases.base import BaseDialect, ThreadedDatabase, import_helper, ConnectError, Mixin_Schema +from data_diff.databases.base import BaseDialect, ThreadedDatabase, import_helper, ConnectError from data_diff.databases.base import ( MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, @@ -40,7 +40,7 @@ def import_postgresql(): @attrs.define(frozen=False) -class PostgresqlDialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): +class PostgresqlDialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "PostgreSQL" ROUNDS_ON_PREC_LOSS = True SUPPORTS_PRIMARY_KEY = True diff --git a/data_diff/databases/presto.py b/data_diff/databases/presto.py index 6b9c7fb1..b308ac77 100644 --- a/data_diff/databases/presto.py +++ b/data_diff/databases/presto.py @@ -27,7 +27,6 @@ Database, import_helper, ThreadLocalInterpreter, - Mixin_Schema, ) from data_diff.databases.base import ( MD5_HEXDIGITS, @@ -53,7 +52,7 @@ def import_presto(): return prestodb -class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): +class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "Presto" ROUNDS_ON_PREC_LOSS = True TYPE_CLASSES = { diff --git a/data_diff/databases/snowflake.py b/data_diff/databases/snowflake.py index 63a7f3e2..746b52e0 100644 --- a/data_diff/databases/snowflake.py +++ b/data_diff/databases/snowflake.py @@ -18,7 +18,6 @@ from data_diff.abcs.mixins import ( AbstractMixin_MD5, AbstractMixin_NormalizeValue, - AbstractMixin_Schema, ) from data_diff.abcs.compiler import Compilable from data_diff.queries.api import table, this, SKIP, code @@ -42,7 +41,7 @@ def import_snowflake(): return snowflake, serialization, default_backend -class Dialect(BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue): +class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "Snowflake" ROUNDS_ON_PREC_LOSS = False TYPE_CLASSES = { @@ -69,9 +68,6 @@ def quote(self, s: str): def to_string(self, s: str): return f"cast({s} as string)" - def table_information(self) -> Compilable: - return table("INFORMATION_SCHEMA", "TABLES") - def set_timezone_to_utc(self) -> str: return "ALTER SESSION SET TIMEZONE = 'UTC'" @@ -100,20 +96,6 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str: def normalize_boolean(self, value: str, _coltype: Boolean) -> str: return self.to_string(f"{value}::int") - def table_information(self) -> Compilable: - return table("INFORMATION_SCHEMA", "TABLES") - - def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: - return ( - self.table_information() - .where( - this.TABLE_SCHEMA == table_schema, - this.TABLE_NAME.like(like) if like is not None else SKIP, - this.TABLE_TYPE == "BASE TABLE", - ) - .select(table_name=this.TABLE_NAME) - ) - @attrs.define(frozen=False, init=False, kw_only=True) class Snowflake(Database): diff --git a/data_diff/databases/vertica.py b/data_diff/databases/vertica.py index fcb34a96..e4f86043 100644 --- a/data_diff/databases/vertica.py +++ b/data_diff/databases/vertica.py @@ -27,9 +27,7 @@ Boolean, ColType_UUID, ) -from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema -from data_diff.abcs.compiler import Compilable -from data_diff.queries.api import table, this, SKIP +from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue @import_helper("vertica") @@ -39,7 +37,7 @@ def import_vertica(): return vertica_python -class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema): +class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue): name = "Vertica" ROUNDS_ON_PREC_LOSS = True @@ -131,19 +129,6 @@ def normalize_uuid(self, value: str, _coltype: ColType_UUID) -> str: def normalize_boolean(self, value: str, _coltype: Boolean) -> str: return self.to_string(f"cast ({value} as int)") - def table_information(self) -> Compilable: - return table("v_catalog", "tables") - - def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable: - return ( - self.table_information() - .where( - this.table_schema == table_schema, - this.table_name.like(like) if like is not None else SKIP, - ) - .select(this.table_name) - ) - @attrs.define(frozen=False, init=False, kw_only=True) class Vertica(ThreadedDatabase): diff --git a/tests/test_database.py b/tests/test_database.py index 3ef06dcc..0280eae1 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -63,44 +63,6 @@ def test_bad_uris(self): self.assertRaises(ValueError, connect, "snowflake://user:pass@foo/bar/TEST1?warehouse=ha&schema=dup") -@test_each_database -class TestSchema(unittest.TestCase): - def test_table_list(self): - name = "tbl_" + random_table_suffix() - db = get_conn(self.db_cls) - tbl = table(db.dialect.parse_table_name(name), schema={"id": int}) - q = db.dialect.list_tables(db.default_schema, name) - assert not db.query(q) - - db.query(tbl.create()) - self.assertEqual(db.query(q, List[str]), [name]) - - db.query(tbl.drop()) - assert not db.query(q) - - def test_type_mapping(self): - name = "tbl_" + random_table_suffix() - db = get_conn(self.db_cls) - tbl = table( - db.dialect.parse_table_name(name), - schema={ - "int": int, - "float": float, - "datetime": datetime, - "str": str, - "bool": bool, - }, - ) - q = db.dialect.list_tables(db.default_schema, name) - assert not db.query(q) - - db.query(tbl.create()) - self.assertEqual(db.query(q, List[str]), [name]) - - db.query(tbl.drop()) - assert not db.query(q) - - @test_each_database class TestQueries(unittest.TestCase): def test_current_timestamp(self):