diff --git a/.gitea/workflows/build.yaml b/.gitea/workflows/build.yaml new file mode 100644 index 0000000..44079c0 --- /dev/null +++ b/.gitea/workflows/build.yaml @@ -0,0 +1,30 @@ +name: Build +on: [push] +jobs: + execute-tests: + runs-on: ubuntu-latest + steps: + - name: Set running + if: always() + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}' + - name: Checkout + uses: actions/checkout@v4 + - name: Set up java + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'temurin' + - name: Build + id: build + run: ./gradlew bootJar -x test + - name: Publish status + if: always() + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.build.conclusion }}"}' \ No newline at end of file diff --git a/.gitea/workflows/tests.yaml b/.gitea/workflows/tests.yaml index 1c2d43c..d8e7e55 100644 --- a/.gitea/workflows/tests.yaml +++ b/.gitea/workflows/tests.yaml @@ -1,15 +1,30 @@ -name: Spark Boilerplate Tests -run-name: Spark boilerplate tests -on: [push] +name: Tests +on: [push, pull_request] jobs: - run-tests: + execute-tests: runs-on: ubuntu-latest steps: + - name: Set running + if: always() + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"running"}' - name: Checkout - uses: actions/checkout@v3 - - name: Set up JDK 17 + uses: actions/checkout@v4 + - name: Set up java uses: actions/setup-java@v3 with: java-version: '17' distribution: 'temurin' - - run: ./gradlew test \ No newline at end of file + - name: Run tests + id: tests + run: ./gradlew test + - name: Publish status + if: always() + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/${{ gitea.workflow }}","status":"${{ steps.tests.conclusion }}"}' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 625dd55..43a3558 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # Compiled classes *.class # Gradle files -.gralde +.gradle # IntelliJ IDEA files .idea # Build files diff --git a/README.md b/README.md index bbdaa62..561d32a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +[![Tests](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Tests)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=tests.yaml) +[![Build](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-boilerplate/Build)](https://git.barrelsofdata.com/barrelsofdata/spark-boilerplate/actions?workflow=build.yaml) + # Spark Boilerplate This is a boilerplate project for Apache Spark. The related blog post can be found at [https://www.barrelsofdata.com/spark-boilerplate-using-scala](https://www.barrelsofdata.com/spark-boilerplate-using-scala) @@ -15,12 +18,8 @@ From the root of the project execute the below commands ```shell script ./gradlew build ``` -- All combined -```shell script -./gradlew clean test build -``` ## Run ```shell script -spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.jar +spark-submit --master yarn --deploy-mode cluster build/libs/spark-boilerplate-1.0.0.jar ``` \ No newline at end of file diff --git a/build.gradle b/build.gradle deleted file mode 100644 index 694228a..0000000 --- a/build.gradle +++ /dev/null @@ -1,56 +0,0 @@ -plugins { - id "scala" -} - -group projectGroup -version projectVersion - -repositories { - mavenCentral() -} - -dependencies { - compileOnly group: "org.scala-lang", name:"scala-library", version: "${scalaMajorVersion}.${scalaMinorVersion}" - - compileOnly group: "org.apache.spark", name: "spark-core_${scalaMajorVersion}", version: apacheSparkVersion - compileOnly group: "org.apache.spark", name: "spark-sql_${scalaMajorVersion}", version: apacheSparkVersion - - testImplementation group: "org.scalatest", name: "scalatest_${scalaMajorVersion}", version: scalaTestVersion -} - -configurations { - testImplementation.extendsFrom compileOnly -} - -tasks.withType(ScalaCompile).configureEach { - scalaCompileOptions.additionalParameters = ["-release:${JavaVersion.current()}".toString()] -} - -tasks.register("scalaTest", JavaExec) { - dependsOn["testClasses"] - mainClass = "org.scalatest.tools.Runner" - args = ["-R", "build/classes/scala/test", "-o"] - jvmArgs = ["--add-exports=java.base/sun.nio.ch=ALL-UNNAMED"] // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9 - classpath = sourceSets.test.runtimeClasspath -} -test.dependsOn scalaTest - -jar { - manifest { - attributes "Main-Class": mainClass - } - from { - configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } - } - archiveFileName.set("${getArchiveBaseName().get()}-${projectVersion}.${getArchiveExtension().get()}") -} - -java { - toolchain { - languageVersion = JavaLanguageVersion.of(targetJVM) - } -} - -clean.doFirst { - delete "logs/" -} diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..d688010 --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,53 @@ +plugins { + scala +} + +project.group = "com.barrelsofdata" +project.version = "1.0.0" + +dependencies { + compileOnly(libs.scala.library) + compileOnly(libs.bundles.spark) + + testImplementation(libs.scala.test) +} + +// https://docs.gradle.org/current/userguide/performance.html +tasks.withType().configureEach { + maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1) +} + +configurations { + implementation { + resolutionStrategy.failOnVersionConflict() + } + testImplementation { + extendsFrom(configurations.compileOnly.get()) + } +} + +tasks.register("scalaTest") { + dependsOn("testClasses") + mainClass = "org.scalatest.tools.Runner" + args = listOf("-R", "build/classes/scala/test", "-o") + jvmArgs = listOf("--add-exports=java.base/sun.nio.ch=ALL-UNNAMED") // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9 + classpath = sourceSets.test.get().runtimeClasspath +} + +tasks.withType { + dependsOn(":scalaTest") +} + +tasks.withType { + manifest { + attributes["Main-Class"] = "com.barrelsofdata.sparkexamples.Driver" + } + from (configurations.runtimeClasspath.get().map { if (it.isDirectory()) it else zipTree(it) }) + archiveFileName.set("${archiveBaseName.get()}-${project.version}.${archiveExtension.get()}") +} + +tasks.clean { + doFirst { + delete("logs/") + } +} \ No newline at end of file diff --git a/gradle.properties b/gradle.properties index 7c0c6a1..6ca7031 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,14 +1,7 @@ -apacheSparkVersion=3.3.2 -scalaMajorVersion=2.13 -scalaMinorVersion=10 -scalaTestVersion=3.2.15 - -mainClass=com.barrelsofdata.sparkexamples.Driver -projectGroup=com.barrelsofdata.sparkexamples -projectVersion=1.0 -targetJVM=17 - +org.gradle.caching=true +org.gradle.configureondemand=true org.gradle.daemon=false -org.gradle.jvmargs=-Xms128m -Xmx256m -XX:+UseSerialGC -Dfile.encoding=UTF-8 +org.gradle.jvmargs=-Xms256m -Xmx2048m -XX:MaxMetaspaceSize=512m -XX:+UseParallelGC -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 +org.gradle.parallel=true org.gradle.warning.mode=all org.gradle.welcome=never \ No newline at end of file diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml new file mode 100644 index 0000000..86ca8aa --- /dev/null +++ b/gradle/libs.versions.toml @@ -0,0 +1,13 @@ +[versions] +apache-spark = "3.5.0" +scala = "2.13.8" +scala-test = "3.2.17" + +[libraries] +scala-library = { module = "org.scala-lang:scala-library", version.ref = "scala" } +scala-test = { module = "org.scalatest:scalatest_2.13", version.ref = "scala-test" } +spark-core = { module = "org.apache.spark:spark-core_2.13", version.ref = "apache-spark" } +spark-sql = { module = "org.apache.spark:spark-sql_2.13", version.ref = "apache-spark" } + +[bundles] +spark = ["spark-core", "spark-sql"] \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index ccebba7..7f93135 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index bdc9a83..3fa8f86 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip networkTimeout=10000 +validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index 79a61d4..1aa94a4 100755 --- a/gradlew +++ b/gradlew @@ -83,10 +83,8 @@ done # This is normally unused # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} -APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum @@ -133,10 +131,13 @@ location of your Java installation." fi else JAVACMD=java - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the location of your Java installation." + fi fi # Increase the maximum file descriptors if we can. @@ -144,7 +145,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac @@ -152,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then '' | soft) :;; #( *) # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac @@ -197,11 +198,15 @@ if "$cygwin" || "$msys" ; then done fi -# Collect all arguments for the java command; -# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of -# shell script including quotes and variable substitutions, so put them in -# double quotes to make sure that they get re-expanded; and -# * put everything else in single quotes, so that it's not re-expanded. + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ diff --git a/settings.gradle b/settings.gradle deleted file mode 100644 index d63964b..0000000 --- a/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -rootProject.name = 'spark-boilerplate' diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..658bf90 --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,14 @@ +pluginManagement { + repositories { + mavenCentral() + gradlePluginPortal() + } +} + +dependencyResolutionManagement { + repositories { + mavenCentral() + } +} + +rootProject.name = "spark-boilerplate"