Use gradle kotlin dsl, catalogs and update versions
Build / execute-tests (push) Failing after 3m15s Details
Tests / execute-tests (push) Successful in 4m38s Details

This commit is contained in:
karthik 2023-10-28 11:10:56 +02:00
parent aa614db625
commit 9d4d60aaa6
13 changed files with 160 additions and 94 deletions

View File

@ -0,0 +1,30 @@
name: Build
on: [push]
jobs:
execute-tests:
runs-on: ubuntu-latest
steps:
- name: Set running
if: always()
run: |
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
-H 'Content-Type: application/json' \
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}'
- name: Checkout
uses: actions/checkout@v4
- name: Set up java
uses: actions/setup-java@v3
with:
java-version: '17'
distribution: 'temurin'
- name: Build
id: build
run: ./gradlew bootJar -x test
- name: Publish status
if: always()
run: |
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
-H 'Content-Type: application/json' \
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.build.conclusion }}"}'

View File

@ -1,15 +1,30 @@
name: Spark Boilerplate Tests
run-name: Spark boilerplate tests
on: [push]
name: Tests
on: [push, pull_request]
jobs:
run-tests:
execute-tests:
runs-on: ubuntu-latest
steps:
- name: Set running
if: always()
run: |
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
-H 'Content-Type: application/json' \
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}'
- name: Checkout
uses: actions/checkout@v3
- name: Set up JDK 17
uses: actions/checkout@v4
- name: Set up java
uses: actions/setup-java@v3
with:
java-version: '17'
distribution: 'temurin'
- run: ./gradlew test
- name: Run tests
id: tests
run: ./gradlew test
- name: Publish status
if: always()
run: |
curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \
-H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \
-H 'Content-Type: application/json' \
-d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.tests.conclusion }}"}'

2
.gitignore vendored
View File

@ -1,7 +1,7 @@
# Compiled classes
*.class
# Gradle files
.gralde
.gradle
# IntelliJ IDEA files
.idea
# Build files

View File

@ -1,3 +1,6 @@
[![Tests](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Tests)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=tests.yaml)
[![Build](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Build)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=build.yaml)
# Spark Boilerplate
This is a boilerplate project for Apache Spark. The related blog post can be found at [https://www.barrelsofdata.com/spark-boilerplate-using-scala](https://www.barrelsofdata.com/spark-boilerplate-using-scala)
@ -15,12 +18,8 @@ From the root of the project execute the below commands
```shell script
./gradlew build
```
- All combined
```shell script
./gradlew clean test build
```
## Run
```shell script
spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.jar
spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.0.jar
```

View File

@ -1,56 +0,0 @@
plugins {
id "scala"
}
group projectGroup
version projectVersion
repositories {
mavenCentral()
}
dependencies {
compileOnly group: "org.scala-lang", name:"scala-library", version: "${scalaMajorVersion}.${scalaMinorVersion}"
compileOnly group: "org.apache.spark", name: "spark-core_${scalaMajorVersion}", version: apacheSparkVersion
compileOnly group: "org.apache.spark", name: "spark-sql_${scalaMajorVersion}", version: apacheSparkVersion
testImplementation group: "org.scalatest", name: "scalatest_${scalaMajorVersion}", version: scalaTestVersion
}
configurations {
testImplementation.extendsFrom compileOnly
}
tasks.withType(ScalaCompile).configureEach {
scalaCompileOptions.additionalParameters = ["-release:${JavaVersion.current()}".toString()]
}
tasks.register("scalaTest", JavaExec) {
dependsOn["testClasses"]
mainClass = "org.scalatest.tools.Runner"
args = ["-R", "build/classes/scala/test", "-o"]
jvmArgs = ["--add-exports=java.base/sun.nio.ch=ALL-UNNAMED"] // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9
classpath = sourceSets.test.runtimeClasspath
}
test.dependsOn scalaTest
jar {
manifest {
attributes "Main-Class": mainClass
}
from {
configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
}
archiveFileName.set("${getArchiveBaseName().get()}-${projectVersion}.${getArchiveExtension().get()}")
}
java {
toolchain {
languageVersion = JavaLanguageVersion.of(targetJVM)
}
}
clean.doFirst {
delete "logs/"
}

53
build.gradle.kts Normal file
View File

@ -0,0 +1,53 @@
plugins {
scala
}
project.group = "com.barrelsofdata"
project.version = "1.0.0"
dependencies {
compileOnly(libs.scala.library)
compileOnly(libs.bundles.spark)
testImplementation(libs.scala.test)
}
// https://docs.gradle.org/current/userguide/performance.html
tasks.withType<Test>().configureEach {
maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
}
configurations {
implementation {
resolutionStrategy.failOnVersionConflict()
}
testImplementation {
extendsFrom(configurations.compileOnly.get())
}
}
tasks.register<JavaExec>("scalaTest") {
dependsOn("testClasses")
mainClass = "org.scalatest.tools.Runner"
args = listOf("-R", "build/classes/scala/test", "-o")
jvmArgs = listOf("--add-exports=java.base/sun.nio.ch=ALL-UNNAMED") // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9
classpath = sourceSets.test.get().runtimeClasspath
}
tasks.withType<Test> {
dependsOn(":scalaTest")
}
tasks.withType<Jar> {
manifest {
attributes["Main-Class"] = "com.barrelsofdata.sparkexamples.Driver"
}
from (configurations.runtimeClasspath.get().map { if (it.isDirectory()) it else zipTree(it) })
archiveFileName.set("${archiveBaseName.get()}-${project.version}.${archiveExtension.get()}")
}
tasks.clean {
doFirst {
delete("logs/")
}
}

View File

@ -1,14 +1,7 @@
apacheSparkVersion=3.3.2
scalaMajorVersion=2.13
scalaMinorVersion=10
scalaTestVersion=3.2.15
mainClass=com.barrelsofdata.sparkexamples.Driver
projectGroup=com.barrelsofdata.sparkexamples
projectVersion=1.0
targetJVM=17
org.gradle.caching=true
org.gradle.configureondemand=true
org.gradle.daemon=false
org.gradle.jvmargs=-Xms128m -Xmx256m -XX:+UseSerialGC -Dfile.encoding=UTF-8
org.gradle.jvmargs=-Xms256m -Xmx2048m -XX:MaxMetaspaceSize=512m -XX:+UseParallelGC -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
org.gradle.parallel=true
org.gradle.warning.mode=all
org.gradle.welcome=never

13
gradle/libs.versions.toml Normal file
View File

@ -0,0 +1,13 @@
[versions]
apache-spark = "3.5.0"
scala = "2.13.8"
scala-test = "3.2.17"
[libraries]
scala-library = { module = "org.scala-lang:scala-library", version.ref = "scala" }
scala-test = { module = "org.scalatest:scalatest_2.13", version.ref = "scala-test" }
spark-core = { module = "org.apache.spark:spark-core_2.13", version.ref = "apache-spark" }
spark-sql = { module = "org.apache.spark:spark-sql_2.13", version.ref = "apache-spark" }
[bundles]
spark = ["spark-core", "spark-sql"]

Binary file not shown.

View File

@ -1,6 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

29
gradlew vendored
View File

@ -83,10 +83,8 @@ done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
@ -133,10 +131,13 @@ location of your Java installation."
fi
else
JAVACMD=java
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
@ -144,7 +145,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
@ -152,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
@ -197,11 +198,15 @@ if "$cygwin" || "$msys" ; then
done
fi
# Collect all arguments for the java command;
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
# shell script including quotes and variable substitutions, so put them in
# double quotes to make sure that they get re-expanded; and
# * put everything else in single quotes, so that it's not re-expanded.
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \

View File

@ -1 +0,0 @@
rootProject.name = 'spark-boilerplate'

14
settings.gradle.kts Normal file
View File

@ -0,0 +1,14 @@
pluginManagement {
repositories {
mavenCentral()
gradlePluginPortal()
}
}
dependencyResolutionManagement {
repositories {
mavenCentral()
}
}
rootProject.name = "spark-boilerplate"