From b9bfa28aac8a1f39cc6b5395f054c9c7ec08e511 Mon Sep 17 00:00:00 2001 From: lbruand Date: Sat, 27 Feb 2021 16:34:45 +0100 Subject: [PATCH 1/2] none12 compression does not exist - none was meant --- .../sparkbyexamples/spark/dataframe/examples/ReadORCFile.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/sparkbyexamples/spark/dataframe/examples/ReadORCFile.scala b/src/main/scala/com/sparkbyexamples/spark/dataframe/examples/ReadORCFile.scala index da94cc9..946c75c 100644 --- a/src/main/scala/com/sparkbyexamples/spark/dataframe/examples/ReadORCFile.scala +++ b/src/main/scala/com/sparkbyexamples/spark/dataframe/examples/ReadORCFile.scala @@ -21,7 +21,7 @@ object ReadORCFile extends App{ .orc("/tmp/orc/data.orc") df.write.mode("overwrite") - .option("compression","none12") + .option("compression","none") .orc("/tmp/orc/data-nocomp.orc") df.write.mode("overwrite") From d925138f2b4f9171ae31701980478dc755711fab Mon Sep 17 00:00:00 2001 From: lbruand Date: Sat, 27 Feb 2021 17:06:03 +0100 Subject: [PATCH 2/2] Improved console output - showing the AQE status --- .../spark30/{ADQExample.scala => AQEExample.scala} | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) rename src/main/scala/com/sparkbyexamples/spark/spark30/{ADQExample.scala => AQEExample.scala} (72%) diff --git a/src/main/scala/com/sparkbyexamples/spark/spark30/ADQExample.scala b/src/main/scala/com/sparkbyexamples/spark/spark30/AQEExample.scala similarity index 72% rename from src/main/scala/com/sparkbyexamples/spark/spark30/ADQExample.scala rename to src/main/scala/com/sparkbyexamples/spark/spark30/AQEExample.scala index 44910c2..d89670b 100644 --- a/src/main/scala/com/sparkbyexamples/spark/spark30/ADQExample.scala +++ b/src/main/scala/com/sparkbyexamples/spark/spark30/AQEExample.scala @@ -2,7 +2,7 @@ package com.sparkbyexamples.spark.spark30 import org.apache.spark.sql.SparkSession -object ADQExample extends App{ +object AQEExample extends App{ val spark: SparkSession = SparkSession.builder() .master("local[5]") @@ -25,12 +25,11 @@ object ADQExample extends App{ val df = simpleData.toDF("employee_name","department","state","salary","age","bonus") val df1=df.groupBy("department").count() - println(df1.rdd.getNumPartitions) - - spark.conf.set("spark.sql.adaptive.enabled",200) - val df2=df.groupBy("department").count() - println(df2.rdd.getNumPartitions) + println(s"Number of partitions in RDD (AQE=[${spark.conf.get("spark.sql.adaptive.enabled")}]) = [${df1.rdd.getNumPartitions}]") + spark.conf.set("spark.sql.adaptive.enabled", true) + val df2=df.groupBy("department").count() + println(s"Number of partitions in RDD (AQE=[${spark.conf.get("spark.sql.adaptive.enabled")}]) = [${df2.rdd.getNumPartitions}]") }