Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 93c128b

Browse files
author
Sergey Vasilyev
committed
Remove unused code: schema fetching via dialects
The schemas are currently fetching via `Database.select_table_schema()` & `Database.query_table_schema()`, so this dialectic code is unused. It is also questionable that the schema fetching must be done via dialects at all: some databases can provide an API (e.g. HTTP API) to fetch the database metadata, such as Databricks. So it belongs to the database, not to dialects.
1 parent 1b99100 commit 93c128b

File tree

12 files changed

+10
-157
lines changed

12 files changed

+10
-157
lines changed

data_diff/abcs/mixins.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -122,25 +122,6 @@ def md5_as_int(self, s: str) -> str:
122122
"Provide SQL for computing md5 and returning an int"
123123

124124

125-
@attrs.define(frozen=False)
126-
class AbstractMixin_Schema(AbstractMixin):
127-
"""Methods for querying the database schema
128-
129-
TODO: Move AbstractDatabase.query_table_schema() and friends over here
130-
"""
131-
132-
def table_information(self) -> Compilable:
133-
"Query to return a table of schema information about existing tables"
134-
raise NotImplementedError()
135-
136-
@abstractmethod
137-
def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
138-
"""Query to select the list of tables in the schema. (query return type: table[str])
139-
140-
If 'like' is specified, the value is applied to the table name, using the 'like' operator.
141-
"""
142-
143-
144125
@attrs.define(frozen=False)
145126
class AbstractMixin_OptimizerHints(AbstractMixin):
146127
@abstractmethod

data_diff/databases/base.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
)
7676
from data_diff.abcs.mixins import Compilable
7777
from data_diff.abcs.mixins import (
78-
AbstractMixin_Schema,
7978
AbstractMixin_NormalizeValue,
8079
AbstractMixin_OptimizerHints,
8180
)
@@ -199,23 +198,6 @@ def apply_query(callback: Callable[[str], Any], sql_code: Union[str, ThreadLocal
199198
return callback(sql_code)
200199

201200

202-
@attrs.define(frozen=False)
203-
class Mixin_Schema(AbstractMixin_Schema):
204-
def table_information(self) -> Compilable:
205-
return table("information_schema", "tables")
206-
207-
def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
208-
return (
209-
self.table_information()
210-
.where(
211-
this.table_schema == table_schema,
212-
this.table_name.like(like) if like is not None else SKIP,
213-
this.table_type == "BASE TABLE",
214-
)
215-
.select(this.table_name)
216-
)
217-
218-
219201
@attrs.define(frozen=False)
220202
class Mixin_OptimizerHints(AbstractMixin_OptimizerHints):
221203
def optimizer_hints(self, hints: str) -> str:
@@ -1019,10 +1001,6 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe
10191001
assert col_name in col_dict
10201002
col_dict[col_name] = String_VaryingAlphanum()
10211003

1022-
# @lru_cache()
1023-
# def get_table_schema(self, path: DbPath) -> Dict[str, ColType]:
1024-
# return self.query_table_schema(path)
1025-
10261004
def _normalize_table_path(self, path: DbPath) -> DbPath:
10271005
if len(path) == 1:
10281006
return self.default_schema, path[0]
@@ -1056,9 +1034,6 @@ def close(self):
10561034
self.is_closed = True
10571035
return super().close()
10581036

1059-
def list_tables(self, tables_like, schema=None):
1060-
return self.query(self.dialect.list_tables(schema or self.default_schema, tables_like))
1061-
10621037
@property
10631038
@abstractmethod
10641039
def dialect(self) -> BaseDialect:

data_diff/databases/bigquery.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
from data_diff.abcs.mixins import (
2424
AbstractMixin_MD5,
2525
AbstractMixin_NormalizeValue,
26-
AbstractMixin_Schema,
2726
)
2827
from data_diff.abcs.compiler import Compilable
2928
from data_diff.queries.api import this, table, SKIP, code
@@ -62,7 +61,7 @@ def import_bigquery_service_account_impersonation():
6261

6362

6463
@attrs.define(frozen=False)
65-
class Dialect(BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
64+
class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
6665
name = "BigQuery"
6766
ROUNDS_ON_PREC_LOSS = False # Technically BigQuery doesn't allow implicit rounding or truncation
6867
TYPE_CLASSES = {
@@ -183,17 +182,6 @@ def normalize_struct(self, value: str, _coltype: Struct) -> str:
183182
# match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc.
184183
return f"to_json_string({value})"
185184

186-
def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
187-
return (
188-
table(table_schema, "INFORMATION_SCHEMA", "TABLES")
189-
.where(
190-
this.table_schema == table_schema,
191-
this.table_name.like(like) if like is not None else SKIP,
192-
this.table_type == "BASE TABLE",
193-
)
194-
.select(this.table_name)
195-
)
196-
197185

198186
@attrs.define(frozen=False, init=False, kw_only=True)
199187
class BigQuery(Database):

data_diff/databases/duckdb.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
TIMESTAMP_PRECISION_POS,
3131
CHECKSUM_OFFSET,
3232
)
33-
from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, Mixin_Schema
34-
from data_diff.queries.ast_classes import ITable
35-
from data_diff.queries.api import code
33+
from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS
3634

3735

3836
@import_helper("duckdb")
@@ -43,7 +41,7 @@ def import_duckdb():
4341

4442

4543
@attrs.define(frozen=False)
46-
class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
44+
class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
4745
name = "DuckDB"
4846
ROUNDS_ON_PREC_LOSS = False
4947
SUPPORTS_PRIMARY_KEY = True

data_diff/databases/mssql.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
ConnectError,
1414
BaseDialect,
1515
)
16-
from data_diff.databases.base import Mixin_Schema
1716
from data_diff.abcs.database_types import (
1817
JSON,
1918
NumericType,
@@ -40,7 +39,6 @@ def import_mssql():
4039
@attrs.define(frozen=False)
4140
class Dialect(
4241
BaseDialect,
43-
Mixin_Schema,
4442
Mixin_OptimizerHints,
4543
AbstractMixin_MD5,
4644
AbstractMixin_NormalizeValue,

data_diff/databases/mysql.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
CHECKSUM_HEXDIGITS,
3232
TIMESTAMP_PRECISION_POS,
3333
CHECKSUM_OFFSET,
34-
Mixin_Schema,
3534
)
3635

3736

@@ -45,7 +44,6 @@ def import_mysql():
4544
@attrs.define(frozen=False)
4645
class Dialect(
4746
BaseDialect,
48-
Mixin_Schema,
4947
Mixin_OptimizerHints,
5048
AbstractMixin_MD5,
5149
AbstractMixin_NormalizeValue,

data_diff/databases/oracle.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
TimestampTZ,
1717
FractionalType,
1818
)
19-
from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema
20-
from data_diff.abcs.compiler import Compilable
21-
from data_diff.queries.api import this, table, SKIP
19+
from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue
2220
from data_diff.databases.base import (
2321
BaseDialect,
2422
Mixin_OptimizerHints,
@@ -46,7 +44,6 @@ def import_oracle():
4644
class Dialect(
4745
BaseDialect,
4846
Mixin_OptimizerHints,
49-
AbstractMixin_Schema,
5047
AbstractMixin_MD5,
5148
AbstractMixin_NormalizeValue,
5249
):
@@ -162,16 +159,6 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
162159
format_str += "0." + "9" * (coltype.precision - 1) + "0"
163160
return f"to_char({value}, '{format_str}')"
164161

165-
def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
166-
return (
167-
table("ALL_TABLES")
168-
.where(
169-
this.OWNER == table_schema,
170-
this.TABLE_NAME.like(like) if like is not None else SKIP,
171-
)
172-
.select(table_name=this.TABLE_NAME)
173-
)
174-
175162

176163
@attrs.define(frozen=False, init=False, kw_only=True)
177164
class Oracle(ThreadedDatabase):

data_diff/databases/postgresql.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
Date,
2020
)
2121
from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue
22-
from data_diff.databases.base import BaseDialect, ThreadedDatabase, import_helper, ConnectError, Mixin_Schema
22+
from data_diff.databases.base import BaseDialect, ThreadedDatabase, import_helper, ConnectError
2323
from data_diff.databases.base import (
2424
MD5_HEXDIGITS,
2525
CHECKSUM_HEXDIGITS,
@@ -40,7 +40,7 @@ def import_postgresql():
4040

4141

4242
@attrs.define(frozen=False)
43-
class PostgresqlDialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
43+
class PostgresqlDialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
4444
name = "PostgreSQL"
4545
ROUNDS_ON_PREC_LOSS = True
4646
SUPPORTS_PRIMARY_KEY = True

data_diff/databases/presto.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
Database,
2828
import_helper,
2929
ThreadLocalInterpreter,
30-
Mixin_Schema,
3130
)
3231
from data_diff.databases.base import (
3332
MD5_HEXDIGITS,
@@ -53,7 +52,7 @@ def import_presto():
5352
return prestodb
5453

5554

56-
class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
55+
class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
5756
name = "Presto"
5857
ROUNDS_ON_PREC_LOSS = True
5958
TYPE_CLASSES = {

data_diff/databases/snowflake.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from data_diff.abcs.mixins import (
1919
AbstractMixin_MD5,
2020
AbstractMixin_NormalizeValue,
21-
AbstractMixin_Schema,
2221
)
2322
from data_diff.abcs.compiler import Compilable
2423
from data_diff.queries.api import table, this, SKIP, code
@@ -42,7 +41,7 @@ def import_snowflake():
4241
return snowflake, serialization, default_backend
4342

4443

45-
class Dialect(BaseDialect, AbstractMixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
44+
class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
4645
name = "Snowflake"
4746
ROUNDS_ON_PREC_LOSS = False
4847
TYPE_CLASSES = {
@@ -69,9 +68,6 @@ def quote(self, s: str):
6968
def to_string(self, s: str):
7069
return f"cast({s} as string)"
7170

72-
def table_information(self) -> Compilable:
73-
return table("INFORMATION_SCHEMA", "TABLES")
74-
7571
def set_timezone_to_utc(self) -> str:
7672
return "ALTER SESSION SET TIMEZONE = 'UTC'"
7773

@@ -100,20 +96,6 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
10096
def normalize_boolean(self, value: str, _coltype: Boolean) -> str:
10197
return self.to_string(f"{value}::int")
10298

103-
def table_information(self) -> Compilable:
104-
return table("INFORMATION_SCHEMA", "TABLES")
105-
106-
def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
107-
return (
108-
self.table_information()
109-
.where(
110-
this.TABLE_SCHEMA == table_schema,
111-
this.TABLE_NAME.like(like) if like is not None else SKIP,
112-
this.TABLE_TYPE == "BASE TABLE",
113-
)
114-
.select(table_name=this.TABLE_NAME)
115-
)
116-
11799

118100
@attrs.define(frozen=False, init=False, kw_only=True)
119101
class Snowflake(Database):

data_diff/databases/vertica.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@
2727
Boolean,
2828
ColType_UUID,
2929
)
30-
from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema
31-
from data_diff.abcs.compiler import Compilable
32-
from data_diff.queries.api import table, this, SKIP
30+
from data_diff.abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue
3331

3432

3533
@import_helper("vertica")
@@ -39,7 +37,7 @@ def import_vertica():
3937
return vertica_python
4038

4139

42-
class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_Schema):
40+
class Dialect(BaseDialect, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
4341
name = "Vertica"
4442
ROUNDS_ON_PREC_LOSS = True
4543

@@ -131,19 +129,6 @@ def normalize_uuid(self, value: str, _coltype: ColType_UUID) -> str:
131129
def normalize_boolean(self, value: str, _coltype: Boolean) -> str:
132130
return self.to_string(f"cast ({value} as int)")
133131

134-
def table_information(self) -> Compilable:
135-
return table("v_catalog", "tables")
136-
137-
def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
138-
return (
139-
self.table_information()
140-
.where(
141-
this.table_schema == table_schema,
142-
this.table_name.like(like) if like is not None else SKIP,
143-
)
144-
.select(this.table_name)
145-
)
146-
147132

148133
@attrs.define(frozen=False, init=False, kw_only=True)
149134
class Vertica(ThreadedDatabase):

tests/test_database.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -63,44 +63,6 @@ def test_bad_uris(self):
6363
self.assertRaises(ValueError, connect, "snowflake://user:pass@foo/bar/TEST1?warehouse=ha&schema=dup")
6464

6565

66-
@test_each_database
67-
class TestSchema(unittest.TestCase):
68-
def test_table_list(self):
69-
name = "tbl_" + random_table_suffix()
70-
db = get_conn(self.db_cls)
71-
tbl = table(db.dialect.parse_table_name(name), schema={"id": int})
72-
q = db.dialect.list_tables(db.default_schema, name)
73-
assert not db.query(q)
74-
75-
db.query(tbl.create())
76-
self.assertEqual(db.query(q, List[str]), [name])
77-
78-
db.query(tbl.drop())
79-
assert not db.query(q)
80-
81-
def test_type_mapping(self):
82-
name = "tbl_" + random_table_suffix()
83-
db = get_conn(self.db_cls)
84-
tbl = table(
85-
db.dialect.parse_table_name(name),
86-
schema={
87-
"int": int,
88-
"float": float,
89-
"datetime": datetime,
90-
"str": str,
91-
"bool": bool,
92-
},
93-
)
94-
q = db.dialect.list_tables(db.default_schema, name)
95-
assert not db.query(q)
96-
97-
db.query(tbl.create())
98-
self.assertEqual(db.query(q, List[str]), [name])
99-
100-
db.query(tbl.drop())
101-
assert not db.query(q)
102-
103-
10466
@test_each_database
10567
class TestQueries(unittest.TestCase):
10668
def test_current_timestamp(self):

0 commit comments

Comments
 (0)