@@ -213,6 +213,7 @@ def _validate_args(
213
213
mode : Literal ["append" , "overwrite" , "overwrite_partitions" ],
214
214
partition_cols : list [str ] | None ,
215
215
merge_cols : list [str ] | None ,
216
+ merge_condition : Literal ["update" , "ignore" ],
216
217
) -> None :
217
218
if df .empty is True :
218
219
raise exceptions .EmptyDataFrame ("DataFrame cannot be empty." )
@@ -232,6 +233,11 @@ def _validate_args(
232
233
"When mode is 'overwrite_partitions' merge_cols must not be specified."
233
234
)
234
235
236
+ if merge_cols and merge_condition not in ["update" , "ignore" ]:
237
+ raise exceptions .InvalidArgumentValue (
238
+ f"Invalid merge_condition: { merge_condition } . Valid values: ['update', 'ignore']"
239
+ )
240
+
235
241
236
242
@apply_configs
237
243
@_utils .validate_distributed_kwargs (
@@ -246,6 +252,7 @@ def to_iceberg(
246
252
table_location : str | None = None ,
247
253
partition_cols : list [str ] | None = None ,
248
254
merge_cols : list [str ] | None = None ,
255
+ merge_condition : Literal ["update" , "ignore" ] = "update" ,
249
256
keep_files : bool = True ,
250
257
data_source : str | None = None ,
251
258
s3_output : str | None = None ,
@@ -292,6 +299,8 @@ def to_iceberg(
292
299
List of column names that will be used for conditional inserts and updates.
293
300
294
301
https://docs.aws.amazon.com/athena/latest/ug/merge-into-statement.html
302
+ merge_condition: str, optional
303
+ The condition to be used in the MERGE INTO statement. Valid values: ['update', 'ignore'].
295
304
keep_files : bool
296
305
Whether staging files produced by Athena are retained. 'True' by default.
297
306
data_source : str, optional
@@ -376,6 +385,7 @@ def to_iceberg(
376
385
mode = mode ,
377
386
partition_cols = partition_cols ,
378
387
merge_cols = merge_cols ,
388
+ merge_condition = merge_condition ,
379
389
)
380
390
381
391
glue_table_settings = cast (
@@ -497,12 +507,16 @@ def to_iceberg(
497
507
# Insert or merge into Iceberg table
498
508
sql_statement : str
499
509
if merge_cols :
510
+ if merge_condition == "update" :
511
+ match_condition = f"""WHEN MATCHED THEN
512
+ UPDATE SET { ', ' .join ([f'"{ x } " = source."{ x } "' for x in df .columns ])} """
513
+ else :
514
+ match_condition = ""
500
515
sql_statement = f"""
501
516
MERGE INTO "{ database } "."{ table } " target
502
517
USING "{ database } "."{ temp_table } " source
503
518
ON { ' AND ' .join ([f'target."{ x } " = source."{ x } "' for x in merge_cols ])}
504
- WHEN MATCHED THEN
505
- UPDATE SET { ', ' .join ([f'"{ x } " = source."{ x } "' for x in df .columns ])}
519
+ { match_condition }
506
520
WHEN NOT MATCHED THEN
507
521
INSERT ({ ', ' .join ([f'"{ x } "' for x in df .columns ])} )
508
522
VALUES ({ ', ' .join ([f'source."{ x } "' for x in df .columns ])} )
0 commit comments