Use gradle kotlin dsl, catalogs and update versions
This commit is contained in:
parent
aa614db625
commit
9d4d60aaa6
30
.gitea/workflows/build.yaml
Normal file
30
.gitea/workflows/build.yaml
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
name: Build
|
||||||
|
on: [push]
|
||||||
|
jobs:
|
||||||
|
execute-tests:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Set running
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||||
|
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}'
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Set up java
|
||||||
|
uses: actions/setup-java@v3
|
||||||
|
with:
|
||||||
|
java-version: '17'
|
||||||
|
distribution: 'temurin'
|
||||||
|
- name: Build
|
||||||
|
id: build
|
||||||
|
run: ./gradlew bootJar -x test
|
||||||
|
- name: Publish status
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||||
|
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.build.conclusion }}"}'
|
@ -1,15 +1,30 @@
|
|||||||
name: Spark Boilerplate Tests
|
name: Tests
|
||||||
run-name: Spark boilerplate tests
|
on: [push, pull_request]
|
||||||
on: [push]
|
|
||||||
jobs:
|
jobs:
|
||||||
run-tests:
|
execute-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
- name: Set running
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||||
|
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}'
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
- name: Set up JDK 17
|
- name: Set up java
|
||||||
uses: actions/setup-java@v3
|
uses: actions/setup-java@v3
|
||||||
with:
|
with:
|
||||||
java-version: '17'
|
java-version: '17'
|
||||||
distribution: 'temurin'
|
distribution: 'temurin'
|
||||||
- run: ./gradlew test
|
- name: Run tests
|
||||||
|
id: tests
|
||||||
|
run: ./gradlew test
|
||||||
|
- name: Publish status
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
|
||||||
|
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.tests.conclusion }}"}'
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,7 +1,7 @@
|
|||||||
# Compiled classes
|
# Compiled classes
|
||||||
*.class
|
*.class
|
||||||
# Gradle files
|
# Gradle files
|
||||||
.gralde
|
.gradle
|
||||||
# IntelliJ IDEA files
|
# IntelliJ IDEA files
|
||||||
.idea
|
.idea
|
||||||
# Build files
|
# Build files
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
[![Tests](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Tests)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=tests.yaml)
|
||||||
|
[![Build](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Build)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=build.yaml)
|
||||||
|
|
||||||
# Spark Boilerplate
|
# Spark Boilerplate
|
||||||
This is a boilerplate project for Apache Spark. The related blog post can be found at [https://www.barrelsofdata.com/spark-boilerplate-using-scala](https://www.barrelsofdata.com/spark-boilerplate-using-scala)
|
This is a boilerplate project for Apache Spark. The related blog post can be found at [https://www.barrelsofdata.com/spark-boilerplate-using-scala](https://www.barrelsofdata.com/spark-boilerplate-using-scala)
|
||||||
|
|
||||||
@ -15,12 +18,8 @@ From the root of the project execute the below commands
|
|||||||
```shell script
|
```shell script
|
||||||
./gradlew build
|
./gradlew build
|
||||||
```
|
```
|
||||||
- All combined
|
|
||||||
```shell script
|
|
||||||
./gradlew clean test build
|
|
||||||
```
|
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
```shell script
|
```shell script
|
||||||
spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.jar
|
spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.0.jar
|
||||||
```
|
```
|
56
build.gradle
56
build.gradle
@ -1,56 +0,0 @@
|
|||||||
plugins {
|
|
||||||
id "scala"
|
|
||||||
}
|
|
||||||
|
|
||||||
group projectGroup
|
|
||||||
version projectVersion
|
|
||||||
|
|
||||||
repositories {
|
|
||||||
mavenCentral()
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
|
||||||
compileOnly group: "org.scala-lang", name:"scala-library", version: "${scalaMajorVersion}.${scalaMinorVersion}"
|
|
||||||
|
|
||||||
compileOnly group: "org.apache.spark", name: "spark-core_${scalaMajorVersion}", version: apacheSparkVersion
|
|
||||||
compileOnly group: "org.apache.spark", name: "spark-sql_${scalaMajorVersion}", version: apacheSparkVersion
|
|
||||||
|
|
||||||
testImplementation group: "org.scalatest", name: "scalatest_${scalaMajorVersion}", version: scalaTestVersion
|
|
||||||
}
|
|
||||||
|
|
||||||
configurations {
|
|
||||||
testImplementation.extendsFrom compileOnly
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.withType(ScalaCompile).configureEach {
|
|
||||||
scalaCompileOptions.additionalParameters = ["-release:${JavaVersion.current()}".toString()]
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register("scalaTest", JavaExec) {
|
|
||||||
dependsOn["testClasses"]
|
|
||||||
mainClass = "org.scalatest.tools.Runner"
|
|
||||||
args = ["-R", "build/classes/scala/test", "-o"]
|
|
||||||
jvmArgs = ["--add-exports=java.base/sun.nio.ch=ALL-UNNAMED"] // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9
|
|
||||||
classpath = sourceSets.test.runtimeClasspath
|
|
||||||
}
|
|
||||||
test.dependsOn scalaTest
|
|
||||||
|
|
||||||
jar {
|
|
||||||
manifest {
|
|
||||||
attributes "Main-Class": mainClass
|
|
||||||
}
|
|
||||||
from {
|
|
||||||
configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
|
|
||||||
}
|
|
||||||
archiveFileName.set("${getArchiveBaseName().get()}-${projectVersion}.${getArchiveExtension().get()}")
|
|
||||||
}
|
|
||||||
|
|
||||||
java {
|
|
||||||
toolchain {
|
|
||||||
languageVersion = JavaLanguageVersion.of(targetJVM)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
clean.doFirst {
|
|
||||||
delete "logs/"
|
|
||||||
}
|
|
53
build.gradle.kts
Normal file
53
build.gradle.kts
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
plugins {
|
||||||
|
scala
|
||||||
|
}
|
||||||
|
|
||||||
|
project.group = "com.barrelsofdata"
|
||||||
|
project.version = "1.0.0"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
compileOnly(libs.scala.library)
|
||||||
|
compileOnly(libs.bundles.spark)
|
||||||
|
|
||||||
|
testImplementation(libs.scala.test)
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://docs.gradle.org/current/userguide/performance.html
|
||||||
|
tasks.withType<Test>().configureEach {
|
||||||
|
maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
configurations {
|
||||||
|
implementation {
|
||||||
|
resolutionStrategy.failOnVersionConflict()
|
||||||
|
}
|
||||||
|
testImplementation {
|
||||||
|
extendsFrom(configurations.compileOnly.get())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.register<JavaExec>("scalaTest") {
|
||||||
|
dependsOn("testClasses")
|
||||||
|
mainClass = "org.scalatest.tools.Runner"
|
||||||
|
args = listOf("-R", "build/classes/scala/test", "-o")
|
||||||
|
jvmArgs = listOf("--add-exports=java.base/sun.nio.ch=ALL-UNNAMED") // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9
|
||||||
|
classpath = sourceSets.test.get().runtimeClasspath
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.withType<Test> {
|
||||||
|
dependsOn(":scalaTest")
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.withType<Jar> {
|
||||||
|
manifest {
|
||||||
|
attributes["Main-Class"] = "com.barrelsofdata.sparkexamples.Driver"
|
||||||
|
}
|
||||||
|
from (configurations.runtimeClasspath.get().map { if (it.isDirectory()) it else zipTree(it) })
|
||||||
|
archiveFileName.set("${archiveBaseName.get()}-${project.version}.${archiveExtension.get()}")
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.clean {
|
||||||
|
doFirst {
|
||||||
|
delete("logs/")
|
||||||
|
}
|
||||||
|
}
|
@ -1,14 +1,7 @@
|
|||||||
apacheSparkVersion=3.3.2
|
org.gradle.caching=true
|
||||||
scalaMajorVersion=2.13
|
org.gradle.configureondemand=true
|
||||||
scalaMinorVersion=10
|
|
||||||
scalaTestVersion=3.2.15
|
|
||||||
|
|
||||||
mainClass=com.barrelsofdata.sparkexamples.Driver
|
|
||||||
projectGroup=com.barrelsofdata.sparkexamples
|
|
||||||
projectVersion=1.0
|
|
||||||
targetJVM=17
|
|
||||||
|
|
||||||
org.gradle.daemon=false
|
org.gradle.daemon=false
|
||||||
org.gradle.jvmargs=-Xms128m -Xmx256m -XX:+UseSerialGC -Dfile.encoding=UTF-8
|
org.gradle.jvmargs=-Xms256m -Xmx2048m -XX:MaxMetaspaceSize=512m -XX:+UseParallelGC -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
|
||||||
|
org.gradle.parallel=true
|
||||||
org.gradle.warning.mode=all
|
org.gradle.warning.mode=all
|
||||||
org.gradle.welcome=never
|
org.gradle.welcome=never
|
13
gradle/libs.versions.toml
Normal file
13
gradle/libs.versions.toml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[versions]
|
||||||
|
apache-spark = "3.5.0"
|
||||||
|
scala = "2.13.8"
|
||||||
|
scala-test = "3.2.17"
|
||||||
|
|
||||||
|
[libraries]
|
||||||
|
scala-library = { module = "org.scala-lang:scala-library", version.ref = "scala" }
|
||||||
|
scala-test = { module = "org.scalatest:scalatest_2.13", version.ref = "scala-test" }
|
||||||
|
spark-core = { module = "org.apache.spark:spark-core_2.13", version.ref = "apache-spark" }
|
||||||
|
spark-sql = { module = "org.apache.spark:spark-sql_2.13", version.ref = "apache-spark" }
|
||||||
|
|
||||||
|
[bundles]
|
||||||
|
spark = ["spark-core", "spark-sql"]
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
3
gradle/wrapper/gradle-wrapper.properties
vendored
3
gradle/wrapper/gradle-wrapper.properties
vendored
@ -1,6 +1,7 @@
|
|||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
|
||||||
networkTimeout=10000
|
networkTimeout=10000
|
||||||
|
validateDistributionUrl=true
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
|
29
gradlew
vendored
29
gradlew
vendored
@ -83,10 +83,8 @@ done
|
|||||||
# This is normally unused
|
# This is normally unused
|
||||||
# shellcheck disable=SC2034
|
# shellcheck disable=SC2034
|
||||||
APP_BASE_NAME=${0##*/}
|
APP_BASE_NAME=${0##*/}
|
||||||
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||||
|
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
|
||||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
|
||||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
|
||||||
|
|
||||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||||
MAX_FD=maximum
|
MAX_FD=maximum
|
||||||
@ -133,18 +131,21 @@ location of your Java installation."
|
|||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
JAVACMD=java
|
JAVACMD=java
|
||||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
if ! command -v java >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||||
|
|
||||||
Please set the JAVA_HOME variable in your environment to match the
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
location of your Java installation."
|
location of your Java installation."
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Increase the maximum file descriptors if we can.
|
# Increase the maximum file descriptors if we can.
|
||||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||||
case $MAX_FD in #(
|
case $MAX_FD in #(
|
||||||
max*)
|
max*)
|
||||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||||
# shellcheck disable=SC3045
|
# shellcheck disable=SC2039,SC3045
|
||||||
MAX_FD=$( ulimit -H -n ) ||
|
MAX_FD=$( ulimit -H -n ) ||
|
||||||
warn "Could not query maximum file descriptor limit"
|
warn "Could not query maximum file descriptor limit"
|
||||||
esac
|
esac
|
||||||
@ -152,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
|||||||
'' | soft) :;; #(
|
'' | soft) :;; #(
|
||||||
*)
|
*)
|
||||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||||
# shellcheck disable=SC3045
|
# shellcheck disable=SC2039,SC3045
|
||||||
ulimit -n "$MAX_FD" ||
|
ulimit -n "$MAX_FD" ||
|
||||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||||
esac
|
esac
|
||||||
@ -197,11 +198,15 @@ if "$cygwin" || "$msys" ; then
|
|||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Collect all arguments for the java command;
|
|
||||||
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
# shell script including quotes and variable substitutions, so put them in
|
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||||
# double quotes to make sure that they get re-expanded; and
|
|
||||||
# * put everything else in single quotes, so that it's not re-expanded.
|
# Collect all arguments for the java command:
|
||||||
|
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||||
|
# and any embedded shellness will be escaped.
|
||||||
|
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||||
|
# treated as '${Hostname}' itself on the command line.
|
||||||
|
|
||||||
set -- \
|
set -- \
|
||||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||||
|
@ -1 +0,0 @@
|
|||||||
rootProject.name = 'spark-boilerplate'
|
|
14
settings.gradle.kts
Normal file
14
settings.gradle.kts
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
pluginManagement {
|
||||||
|
repositories {
|
||||||
|
mavenCentral()
|
||||||
|
gradlePluginPortal()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencyResolutionManagement {
|
||||||
|
repositories {
|
||||||
|
mavenCentral()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rootProject.name = "spark-boilerplate"
|
Loading…
Reference in New Issue
Block a user