Skip to content

Remove Spark 2.x #2316

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions dist/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ base {
archivesName = 'elasticsearch-hadoop'
}

def sparkVariantIncluded = 'spark20scala211'
def sparkVariantIncluded = 'spark30scala212'

configurations {
embedded {
Expand Down Expand Up @@ -47,17 +47,17 @@ configurations {

BuildPlugin.disableTransitiveDependencies(project, project.configurations.thirdPartyShaded)

def distProjects = [":elasticsearch-hadoop-mr", ":elasticsearch-hadoop-hive",
":elasticsearch-spark-20"]
def distProjects = [":elasticsearch-hadoop-mr", ":elasticsearch-hadoop-hive", ":elasticsearch-spark-30"]

distProjects.each { distProject ->
def configureDistDependency = { Dependency dependency ->
if (distProject == ":elasticsearch-spark-20") {
if (distProject == ":elasticsearch-spark-30") {
dependency.capabilities {
requireCapability("org.elasticsearch.spark.sql.variant:$sparkVariantIncluded:$project.version")
}
}
}

dependencies {
// This is only going to pull in each project's regular jar to create the project-wide uberjar.
add('embedded', project(distProject), configureDistDependency)
Expand Down Expand Up @@ -86,25 +86,25 @@ dependencies {
}
implementation("org.apache.hive:hive-exec:$hiveVersion")
implementation("org.apache.hive:hive-metastore:$hiveVersion")
implementation("org.apache.spark:spark-core_${project.ext.scala211MajorVersion}:$spark20Version") {
implementation("org.apache.spark:spark-core_${project.ext.scala212MajorVersion}:$spark30Version") {
exclude group: 'javax.servlet'
exclude group: 'org.apache.hadoop'
}
implementation("org.apache.spark:spark-yarn_${project.ext.scala211MajorVersion}:$spark20Version") {
implementation("org.apache.spark:spark-yarn_${project.ext.scala212MajorVersion}:$spark30Version") {
exclude group: 'org.apache.hadoop'
}
implementation("org.apache.spark:spark-sql_${project.ext.scala211MajorVersion}:$spark20Version") {
implementation("org.apache.spark:spark-sql_${project.ext.scala212MajorVersion}:$spark30Version") {
exclude group: 'org.apache.hadoop'
}
implementation("org.apache.spark:spark-streaming_${project.ext.scala211MajorVersion}:$spark20Version") {
implementation("org.apache.spark:spark-streaming_${project.ext.scala212MajorVersion}:$spark30Version") {
exclude group: 'org.apache.hadoop'
}
implementation("org.scala-lang:scala-library:$scala211Version")
implementation("org.scala-lang:scala-reflect:$scala211Version")
implementation(project.ext.hadoopClient)
implementation("org.apache.hadoop:hadoop-common:${project.ext.hadoopVersion}")
implementation("org.apache.hadoop:hadoop-mapreduce-client-core:${project.ext.hadoopVersion}")
compileOnly("org.apache.spark:spark-catalyst_${project.ext.scala211MajorVersion}:$spark20Version")
compileOnly("org.apache.spark:spark-catalyst_${project.ext.scala212MajorVersion}:$spark30Version")
}

// Configure uber jar
Expand Down
1 change: 0 additions & 1 deletion dist/licenses/spark-core_2.11-2.3.0.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions dist/licenses/spark-core_2.12-3.4.3.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6f7105e792cbb650eca44890f0444720bd6a8204
1 change: 0 additions & 1 deletion dist/licenses/spark-sql_2.11-2.3.0.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions dist/licenses/spark-sql_2.12-3.4.3.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a28ead1aa19899654296a6f8a458f8f207f89a73
1 change: 0 additions & 1 deletion dist/licenses/spark-streaming_2.11-2.3.0.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions dist/licenses/spark-streaming_2.12-3.4.3.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e4a9f76eff1db09c67f855cdf60cd83bbfad69ab
1 change: 0 additions & 1 deletion dist/licenses/spark-yarn_2.11-2.3.0.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions dist/licenses/spark-yarn_2.12-3.4.3.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0c8dd1e45fbc589d2438a889c7fe98f0e9fd77ec
10 changes: 10 additions & 0 deletions docs/src/reference/asciidoc/appendix/breaking.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ For clarity, we always list any breaking changes at the top of the
//NOTE: The notable-breaking-changes tagged regions are re-used in the
//Installation and Upgrade Guide

[[breaking-changes-90]]
=== Breaking Changes in 9.0

This section details the breaking changes when upgrading {eh} from 8.x to 9.0.

[[removals-9.0]]
==== Removal of Spark 2.x

Support for the Spark 2.x has been removed in {eh} 9.0. Spark 3.x is the new default supported version.

=== Deprecations in 8.18

The following functionality has been deprecated in {eh} 8.18 and will be removed
Expand Down
4 changes: 2 additions & 2 deletions docs/src/reference/asciidoc/core/intro/download.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ The Spark connector framework is the most sensitive to version incompatibilities
| Spark Version | Scala Version | ES-Hadoop Artifact ID
| 1.0 - 2.x | 2.10 | <unsupported>
| 1.0 - 1.6 | 2.11 | <unsupported>
| 2.x | 2.11 | elasticsearch-spark-20_2.11
| 2.x | 2.12 | elasticsearch-spark-20_2.12
| 2.x | 2.11 | <unsupported>
| 2.x | 2.12 | <unsupported>
| 3.0+ | 2.12 | elasticsearch-spark-30_2.12
| 3.2+ | 2.13 | elasticsearch-spark-30_2.13
|==========================================================
Expand Down
4 changes: 0 additions & 4 deletions settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ include 'spark-core'
project(":spark-core").projectDir = new File(settingsDir, "spark/core")
project(":spark-core").name = "elasticsearch-spark"

include 'sql-20'
project(":sql-20").projectDir = new File(settingsDir, "spark/sql-20")
project(":sql-20").name = "elasticsearch-spark-20"

include 'sql-30'
project(":sql-30").projectDir = new File(settingsDir, "spark/sql-30")
project(":sql-30").name = "elasticsearch-spark-30"
Expand Down
40 changes: 2 additions & 38 deletions spark/core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ apply plugin: 'spark.variants'
sparkVariants {
capabilityGroup 'org.elasticsearch.spark.variant'

// Changing the formatting of these lines could break .buildkite/pipeline.py, it uses regex to parse the `spark20scala212` part
// Changing the formatting of these lines could break .buildkite/pipeline.py, it uses regex to parse the `spark30scala212` part
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should check in with delivery about this line to see if anything needs to be updated in CI for this change

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I also found this old PR of yours that removes the old Spark from release manager.

// We should maybe move these to a separate config file that can be read from both this file and the pipeline script in the future if it creates issues
setCoreDefaultVariant "spark20scala212", spark24Version, scala212Version
setCoreDefaultVariant "spark30scala212", spark30Version, scala212Version
addCoreFeatureVariant "spark30scala213", spark30Version, scala213Version
addCoreFeatureVariant "spark30scala212", spark30Version, scala212Version
addCoreFeatureVariant "spark20scala211", spark24Version, scala211Version

all { SparkVariantPlugin.SparkVariant variant ->

Expand Down Expand Up @@ -44,7 +42,6 @@ sparkVariants {
add(variant.configuration('api'), "org.scala-lang:scala-library:${variant.scalaVersion}")
add(variant.configuration('api'), "org.scala-lang:scala-reflect:${variant.scalaVersion}")
add(variant.configuration('api'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:${variant.sparkVersion}") {
exclude group: 'javax.servlet'
exclude group: 'org.apache.hadoop'
}

Expand All @@ -59,29 +56,11 @@ sparkVariants {
add(variant.configuration('test', 'implementation'), project(":test:shared"))
add(variant.configuration('test', 'implementation'), "com.esotericsoftware:kryo:4.0.2")
add(variant.configuration('test', 'implementation'), "org.apache.spark:spark-core_${variant.scalaMajorVersion}:${variant.sparkVersion}") {
exclude group: 'javax.servlet'
exclude group: 'org.apache.hadoop'
}

add(variant.configuration('itest', 'implementation'), project(":test:shared"))
add(variant.configuration('test', 'implementation'), "org.elasticsearch:securemock:1.2")

if (variant.scalaMajorVersion == '2.10') {
add(variant.configuration('implementation'), "org.apache.spark:spark-unsafe_${variant.scalaMajorVersion}:${variant.sparkVersion}")
add(variant.configuration('implementation'), "org.apache.avro:avro:1.7.7")
add(variant.configuration('implementation'), "log4j:log4j:1.2.17")
add(variant.configuration('implementation'), "com.google.code.findbugs:jsr305:2.0.1")
add(variant.configuration('implementation'), "org.json4s:json4s-ast_2.10:3.2.10")
add(variant.configuration('implementation'), "com.esotericsoftware.kryo:kryo:2.21")
add(variant.configuration('compileOnly'), "org.apache.hadoop:hadoop-annotations:${project.ext.hadoopVersion}")
add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-core-asl:${project.ext.jacksonVersion}")
add(variant.configuration('compileOnly'), "org.codehaus.jackson:jackson-mapper-asl:${project.ext.jacksonVersion}")
add(variant.configuration('compileOnly'), "org.codehaus.woodstox:stax2-api:3.1.4")
if (variant.sparkVersion == spark22Version) {
add(variant.configuration('compileOnly'), "org.apache.spark:spark-tags_${variant.scalaMajorVersion}:${variant.sparkVersion}")
}
}

add(variant.configuration('additionalSources'), project(":elasticsearch-hadoop-mr"))
add(variant.configuration('javadocSources'), project(":elasticsearch-hadoop-mr"))
}
Expand Down Expand Up @@ -128,21 +107,6 @@ sparkVariants {
}
}

// deal with the messy conflicts out there
// Ignore the scalaCompilerPlugin configurations since it is immediately resolved to configure the scala compiler tasks
configurations.matching{ it.name.contains('CompilerPlugin') == false && (it.name.contains("spark30") || it.name.contains("Spark30")) == false}.all { Configuration conf ->
conf.resolutionStrategy {
eachDependency { details ->
// change all javax.servlet artifacts to the one used by Spark otherwise these will lead to
// SecurityException (signer information wrong)
if (details.requested.name.contains("servlet") && !details.requested.name.contains("guice")) {
details.useTarget group: "org.eclipse.jetty.orbit", name: "javax.servlet", version: "3.0.0.v201112011016"
}
}
}
conf.exclude group: "org.mortbay.jetty"
}

if (JavaVersion.current() >= JavaVersion.VERSION_17) {
tasks.withType(Test) { Test task ->
if (task.getName().startsWith("test"))
Expand Down
1 change: 0 additions & 1 deletion spark/core/licenses/spark-core_2.12-2.4.4.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions spark/core/licenses/spark-core_2.12-3.4.3.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6f7105e792cbb650eca44890f0444720bd6a8204
Loading