Use gradle kotlin dsl, catalogs and update versions
This commit is contained in:
parent
aa614db625
commit
9d4d60aaa6
30
.gitea/workflows/build.yaml
Normal file
30
.gitea/workflows/build.yaml
Normal file
@ -0,0 +1,30 @@
|
||||
name: Build
|
||||
on: [push]
|
||||
jobs:
|
||||
execute-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set running
|
||||
if: always()
|
||||
run: |
|
||||
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}'
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up java
|
||||
uses: actions/setup-java@v3
|
||||
with:
|
||||
java-version: '17'
|
||||
distribution: 'temurin'
|
||||
- name: Build
|
||||
id: build
|
||||
run: ./gradlew bootJar -x test
|
||||
- name: Publish status
|
||||
if: always()
|
||||
run: |
|
||||
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.build.conclusion }}"}'
|
@ -1,15 +1,30 @@
|
||||
name: Spark Boilerplate Tests
|
||||
run-name: Spark boilerplate tests
|
||||
on: [push]
|
||||
name: Tests
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
run-tests:
|
||||
execute-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set running
|
||||
if: always()
|
||||
run: |
|
||||
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}'
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up JDK 17
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up java
|
||||
uses: actions/setup-java@v3
|
||||
with:
|
||||
java-version: '17'
|
||||
distribution: 'temurin'
|
||||
- run: ./gradlew test
|
||||
- name: Run tests
|
||||
id: tests
|
||||
run: ./gradlew test
|
||||
- name: Publish status
|
||||
if: always()
|
||||
run: |
|
||||
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.tests.conclusion }}"}'
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,7 +1,7 @@
|
||||
# Compiled classes
|
||||
*.class
|
||||
# Gradle files
|
||||
.gralde
|
||||
.gradle
|
||||
# IntelliJ IDEA files
|
||||
.idea
|
||||
# Build files
|
||||
|
@ -1,3 +1,6 @@
|
||||
[![Tests](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Tests)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=tests.yaml)
|
||||
[![Build](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Build)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=build.yaml)
|
||||
|
||||
# Spark Boilerplate
|
||||
This is a boilerplate project for Apache Spark. The related blog post can be found at [https://www.barrelsofdata.com/spark-boilerplate-using-scala](https://www.barrelsofdata.com/spark-boilerplate-using-scala)
|
||||
|
||||
@ -15,12 +18,8 @@ From the root of the project execute the below commands
|
||||
```shell script
|
||||
./gradlew build
|
||||
```
|
||||
- All combined
|
||||
```shell script
|
||||
./gradlew clean test build
|
||||
```
|
||||
|
||||
## Run
|
||||
```shell script
|
||||
spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.jar
|
||||
spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.0.jar
|
||||
```
|
56
build.gradle
56
build.gradle
@ -1,56 +0,0 @@
|
||||
plugins {
|
||||
id "scala"
|
||||
}
|
||||
|
||||
group projectGroup
|
||||
version projectVersion
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
compileOnly group: "org.scala-lang", name:"scala-library", version: "${scalaMajorVersion}.${scalaMinorVersion}"
|
||||
|
||||
compileOnly group: "org.apache.spark", name: "spark-core_${scalaMajorVersion}", version: apacheSparkVersion
|
||||
compileOnly group: "org.apache.spark", name: "spark-sql_${scalaMajorVersion}", version: apacheSparkVersion
|
||||
|
||||
testImplementation group: "org.scalatest", name: "scalatest_${scalaMajorVersion}", version: scalaTestVersion
|
||||
}
|
||||
|
||||
configurations {
|
||||
testImplementation.extendsFrom compileOnly
|
||||
}
|
||||
|
||||
tasks.withType(ScalaCompile).configureEach {
|
||||
scalaCompileOptions.additionalParameters = ["-release:${JavaVersion.current()}".toString()]
|
||||
}
|
||||
|
||||
tasks.register("scalaTest", JavaExec) {
|
||||
dependsOn["testClasses"]
|
||||
mainClass = "org.scalatest.tools.Runner"
|
||||
args = ["-R", "build/classes/scala/test", "-o"]
|
||||
jvmArgs = ["--add-exports=java.base/sun.nio.ch=ALL-UNNAMED"] // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9
|
||||
classpath = sourceSets.test.runtimeClasspath
|
||||
}
|
||||
test.dependsOn scalaTest
|
||||
|
||||
jar {
|
||||
manifest {
|
||||
attributes "Main-Class": mainClass
|
||||
}
|
||||
from {
|
||||
configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
|
||||
}
|
||||
archiveFileName.set("${getArchiveBaseName().get()}-${projectVersion}.${getArchiveExtension().get()}")
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion = JavaLanguageVersion.of(targetJVM)
|
||||
}
|
||||
}
|
||||
|
||||
clean.doFirst {
|
||||
delete "logs/"
|
||||
}
|
53
build.gradle.kts
Normal file
53
build.gradle.kts
Normal file
@ -0,0 +1,53 @@
|
||||
plugins {
|
||||
scala
|
||||
}
|
||||
|
||||
project.group = "com.barrelsofdata"
|
||||
project.version = "1.0.0"
|
||||
|
||||
dependencies {
|
||||
compileOnly(libs.scala.library)
|
||||
compileOnly(libs.bundles.spark)
|
||||
|
||||
testImplementation(libs.scala.test)
|
||||
}
|
||||
|
||||
// https://docs.gradle.org/current/userguide/performance.html
|
||||
tasks.withType<Test>().configureEach {
|
||||
maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
|
||||
}
|
||||
|
||||
configurations {
|
||||
implementation {
|
||||
resolutionStrategy.failOnVersionConflict()
|
||||
}
|
||||
testImplementation {
|
||||
extendsFrom(configurations.compileOnly.get())
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register<JavaExec>("scalaTest") {
|
||||
dependsOn("testClasses")
|
||||
mainClass = "org.scalatest.tools.Runner"
|
||||
args = listOf("-R", "build/classes/scala/test", "-o")
|
||||
jvmArgs = listOf("--add-exports=java.base/sun.nio.ch=ALL-UNNAMED") // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9
|
||||
classpath = sourceSets.test.get().runtimeClasspath
|
||||
}
|
||||
|
||||
tasks.withType<Test> {
|
||||
dependsOn(":scalaTest")
|
||||
}
|
||||
|
||||
tasks.withType<Jar> {
|
||||
manifest {
|
||||
attributes["Main-Class"] = "com.barrelsofdata.sparkexamples.Driver"
|
||||
}
|
||||
from (configurations.runtimeClasspath.get().map { if (it.isDirectory()) it else zipTree(it) })
|
||||
archiveFileName.set("${archiveBaseName.get()}-${project.version}.${archiveExtension.get()}")
|
||||
}
|
||||
|
||||
tasks.clean {
|
||||
doFirst {
|
||||
delete("logs/")
|
||||
}
|
||||
}
|
@ -1,14 +1,7 @@
|
||||
apacheSparkVersion=3.3.2
|
||||
scalaMajorVersion=2.13
|
||||
scalaMinorVersion=10
|
||||
scalaTestVersion=3.2.15
|
||||
|
||||
mainClass=com.barrelsofdata.sparkexamples.Driver
|
||||
projectGroup=com.barrelsofdata.sparkexamples
|
||||
projectVersion=1.0
|
||||
targetJVM=17
|
||||
|
||||
org.gradle.caching=true
|
||||
org.gradle.configureondemand=true
|
||||
org.gradle.daemon=false
|
||||
org.gradle.jvmargs=-Xms128m -Xmx256m -XX:+UseSerialGC -Dfile.encoding=UTF-8
|
||||
org.gradle.jvmargs=-Xms256m -Xmx2048m -XX:MaxMetaspaceSize=512m -XX:+UseParallelGC -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
|
||||
org.gradle.parallel=true
|
||||
org.gradle.warning.mode=all
|
||||
org.gradle.welcome=never
|
13
gradle/libs.versions.toml
Normal file
13
gradle/libs.versions.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[versions]
|
||||
apache-spark = "3.5.0"
|
||||
scala = "2.13.8"
|
||||
scala-test = "3.2.17"
|
||||
|
||||
[libraries]
|
||||
scala-library = { module = "org.scala-lang:scala-library", version.ref = "scala" }
|
||||
scala-test = { module = "org.scalatest:scalatest_2.13", version.ref = "scala-test" }
|
||||
spark-core = { module = "org.apache.spark:spark-core_2.13", version.ref = "apache-spark" }
|
||||
spark-sql = { module = "org.apache.spark:spark-sql_2.13", version.ref = "apache-spark" }
|
||||
|
||||
[bundles]
|
||||
spark = ["spark-core", "spark-sql"]
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
3
gradle/wrapper/gradle-wrapper.properties
vendored
3
gradle/wrapper/gradle-wrapper.properties
vendored
@ -1,6 +1,7 @@
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
|
||||
networkTimeout=10000
|
||||
validateDistributionUrl=true
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
|
29
gradlew
vendored
29
gradlew
vendored
@ -83,10 +83,8 @@ done
|
||||
# This is normally unused
|
||||
# shellcheck disable=SC2034
|
||||
APP_BASE_NAME=${0##*/}
|
||||
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD=maximum
|
||||
@ -133,10 +131,13 @@ location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD=java
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
if ! command -v java >/dev/null 2>&1
|
||||
then
|
||||
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
@ -144,7 +145,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC3045
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
@ -152,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC3045
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
@ -197,11 +198,15 @@ if "$cygwin" || "$msys" ; then
|
||||
done
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command;
|
||||
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
||||
# shell script including quotes and variable substitutions, so put them in
|
||||
# double quotes to make sure that they get re-expanded; and
|
||||
# * put everything else in single quotes, so that it's not re-expanded.
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Collect all arguments for the java command:
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||
# and any embedded shellness will be escaped.
|
||||
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||
# treated as '${Hostname}' itself on the command line.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
|
@ -1 +0,0 @@
|
||||
rootProject.name = 'spark-boilerplate'
|
14
settings.gradle.kts
Normal file
14
settings.gradle.kts
Normal file
@ -0,0 +1,14 @@
|
||||
pluginManagement {
|
||||
repositories {
|
||||
mavenCentral()
|
||||
gradlePluginPortal()
|
||||
}
|
||||
}
|
||||
|
||||
dependencyResolutionManagement {
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
}
|
||||
|
||||
rootProject.name = "spark-boilerplate"
|
Loading…
x
Reference in New Issue
Block a user