diff --git a/.gitea/workflows/workflow.yaml b/.gitea/workflows/workflow.yaml new file mode 100644 index 0000000..2c3330e --- /dev/null +++ b/.gitea/workflows/workflow.yaml @@ -0,0 +1,72 @@ +name: Tests +on: [push] +jobs: + reset-status: + runs-on: ubuntu-latest + steps: + - name: Reset test status + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/tests","status":"starting"}' + - name: Reset build status + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/build","status":"waiting for test"}' + tests: + needs: reset-status + runs-on: ubuntu-latest + steps: + - name: Set test running + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/tests","status":"running"}' + - name: Checkout + uses: actions/checkout@v4 + - name: Set up java + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'temurin' + - name: Run tests + id: tests + run: ./gradlew test + - name: Publish test status + if: always() + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/tests","status":"${{ steps.tests.conclusion }}"}' + build: + needs: tests + runs-on: ubuntu-latest + steps: + - name: Set build running + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/build","status":"running"}' + - name: Checkout + uses: actions/checkout@v4 + - name: Set up java + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'temurin' + - name: Build + id: build + run: ./gradlew build -x test + - name: Publish build status + if: always() + run: | + curl -v -X POST https://barrelsofdata.com/api/v1/git/action/status/publish \ + -H 'X-API-KEY: ${{ secrets.STATUS_PUBLISH_API_KEY }}' \ + -H 'Content-Type: application/json' \ + -d '{"action":"${{ gitea.repository }}/action/build","status":"${{ steps.build.conclusion }}"}' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 625dd55..43a3558 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # Compiled classes *.class # Gradle files -.gralde +.gradle # IntelliJ IDEA files .idea # Build files diff --git a/README.md b/README.md index 723af0e..11aa4dd 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ +[![Tests](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-structured-streaming-wordcount/tests)](https://git.barrelsofdata.com/barrelsofdata/spark-structured-streaming-wordcount/actions?workflow=workflow.yaml) +[![Build](https://barrelsofdata.com/api/v1/git/action/status/fetch/barrelsofdata/spark-structured-streaming-wordcount/build)](https://git.barrelsofdata.com/barrelsofdata/spark-structured-streaming-wordcount/actions?workflow=workflow.yaml) + # Spark Structured Streaming Word Count -This is a project detailing how to write a streaming word count program in Apache Spark using Structured Streaming. The related blog post can be found at [https://www.barrelsofdata.com/spark-structured-streaming-word-count](https://www.barrelsofdata.com/spark-structured-streaming-word-count) +This is a project detailing how to write a streaming word count program in Apache Spark using Structured Streaming. The related blog post can be found at [https://barrelsofdata.com/spark-structured-streaming-word-count](https://barrelsofdata.com/spark-structured-streaming-word-count) ## Build instructions From the root of the project execute the below commands @@ -13,23 +16,19 @@ From the root of the project execute the below commands ``` - To build jar ```shell script -./gradlew shadowJar -``` -- All combined -```shell script -./gradlew clean test shadowJar +./gradlew build ``` ## Run -Ensure your local hadoop cluster is running ([hadoop cluster tutorial](https://www.barrelsofdata.com/apache-hadoop-pseudo-distributed-mode)) and start two kafka brokers ([kafka tutorial](https://www.barrelsofdata.com/apache-kafka-setup)). +Ensure your local hadoop cluster is running ([hadoop cluster tutorial](https://barrelsofdata.com/apache-hadoop-pseudo-distributed-mode)) and start two kafka brokers ([kafka tutorial](https://barrelsofdata.com/apache-kafka-setup)). - Create kafka topic ```shell script kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 2 --partitions 2 --topic streaming-data ``` - Start streaming job ```shell script -spark-submit --master yarn --deploy-mode cluster build/libs/spark-structured-streaming-wordcount-1.0.jar -Example: spark-submit --master yarn --deploy-mode client build/libs/spark-structured-streaming-wordcount-1.0.jar localhost:9092 streaming-data +spark-submit --master yarn --deploy-mode cluster build/libs/spark-structured-streaming-wordcount-1.0.0.jar +Example: spark-submit --master yarn --deploy-mode client build/libs/spark-structured-streaming-wordcount-1.0.0.jar localhost:9092 streaming-data ``` - You can feed simulated data to the kafka topic - Open new terminal and run the shell script located at src/test/resources/dataProducer.sh diff --git a/build.gradle b/build.gradle deleted file mode 100644 index bcbadff..0000000 --- a/build.gradle +++ /dev/null @@ -1,46 +0,0 @@ -plugins { - id "scala" - id "com.github.johnrengelman.shadow" version "${shadowJarPluginVersion}" -} - -group "${projectGroup}" -version "${projectVersion}" - -repositories { - mavenCentral() -} - -dependencies { - compileOnly group: "org.scala-lang", name:"scala-library", version: "${scalaMajorVersion}.${scalaMinorVersion}" - - compileOnly group: "org.apache.spark", name: "spark-core_${scalaMajorVersion}", version: "${apacheSparkVersion}" - compileOnly group: "org.apache.spark", name: "spark-sql_${scalaMajorVersion}", version: "${apacheSparkVersion}" - implementation group: "org.apache.spark", name: "spark-sql-kafka-0-10_${scalaMajorVersion}", version: "${apacheSparkVersion}" - - testImplementation group: "org.scalatest", name: "scalatest_${scalaMajorVersion}", version: "${scalaTestVersion}" -} - -configurations { - testImplementation.extendsFrom compileOnly -} - -task scalaTest(dependsOn: ['testClasses'], type: JavaExec) { - main = 'org.scalatest.tools.Runner' - args = ['-R', 'build/classes/scala/test', '-o'] - classpath = sourceSets.test.runtimeClasspath -} - -test.dependsOn scalaTest - -shadowJar { - mergeServiceFiles() - zip64 true - manifest { - attributes "Main-Class": "${mainClass}" - } - archiveFileName.set("${getArchiveBaseName().get()}-${projectVersion}.${getArchiveExtension().get()}") -} - -clean.doFirst { - delete "logs/" -} \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..dc72a4a --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,56 @@ +plugins { + scala +} + +project.group = "com.barrelsofdata" +project.version = "1.0.0" + +dependencies { + compileOnly(libs.scala.library) + compileOnly(libs.bundles.spark) + + implementation(libs.spark.sql.kafka) + + testImplementation(libs.scala.test) +} + +// https://docs.gradle.org/current/userguide/performance.html +tasks.withType().configureEach { + maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1) +} + +configurations { + implementation { + resolutionStrategy.failOnVersionConflict() + } + testImplementation { + extendsFrom(configurations.compileOnly.get()) + } +} + +tasks.register("scalaTest") { + dependsOn("testClasses") + mainClass = "org.scalatest.tools.Runner" + args = listOf("-R", "build/classes/scala/test", "-o") + jvmArgs = listOf("-Xms128m", "-Xmx512m", "-XX:MetaspaceSize=300m", "-ea", "--add-exports=java.base/sun.nio.ch=ALL-UNNAMED") // https://lists.apache.org/thread/p1yrwo126vjx5tht82cktgjbmm2xtpw9 + classpath = sourceSets.test.get().runtimeClasspath +} + +tasks.withType { + dependsOn(":scalaTest") +} + +tasks.withType { + manifest { + attributes["Main-Class"] = "com.barrelsofdata.sparkexamples.Driver" + } + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + from (configurations.runtimeClasspath.get().map { if (it.isDirectory()) it else zipTree(it) }) + archiveFileName.set("${archiveBaseName.get()}-${project.version}.${archiveExtension.get()}") +} + +tasks.clean { + doFirst { + delete("logs/") + } +} \ No newline at end of file diff --git a/gradle.properties b/gradle.properties index 12adfdf..6ca7031 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,14 +1,7 @@ -scalaMajorVersion=2.12 -scalaMinorVersion=10 - -apacheSparkVersion=3.0.0 - -scalaTestVersion=3.1.2 - -shadowJarPluginVersion=6.0.0 - -mainClass=com.barrelsofdata.sparkexamples.Driver -projectGroup=com.barrelsofdata.sparkexamples -projectVersion=1.0 - +org.gradle.caching=true +org.gradle.configureondemand=true org.gradle.daemon=false +org.gradle.jvmargs=-Xms256m -Xmx2048m -XX:MaxMetaspaceSize=512m -XX:+UseParallelGC -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 +org.gradle.parallel=true +org.gradle.warning.mode=all +org.gradle.welcome=never \ No newline at end of file diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml new file mode 100644 index 0000000..53d7632 --- /dev/null +++ b/gradle/libs.versions.toml @@ -0,0 +1,14 @@ +[versions] +apache-spark = "3.5.0" +scala = "2.13.8" +scala-test = "3.2.17" + +[libraries] +scala-library = { module = "org.scala-lang:scala-library", version.ref = "scala" } +scala-test = { module = "org.scalatest:scalatest_2.13", version.ref = "scala-test" } +spark-core = { module = "org.apache.spark:spark-core_2.13", version.ref = "apache-spark" } +spark-sql = { module = "org.apache.spark:spark-sql_2.13", version.ref = "apache-spark" } +spark-sql-kafka = { module = "org.apache.spark:spark-sql-kafka-0-10_2.13", version.ref = "apache-spark" } + +[bundles] +spark = ["spark-core", "spark-sql"] \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 62d4c05..7f93135 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index bb8b2fc..3fa8f86 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.5.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip +networkTimeout=10000 +validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index fbd7c51..1aa94a4 100755 --- a/gradlew +++ b/gradlew @@ -1,7 +1,7 @@ -#!/usr/bin/env sh +#!/bin/sh # -# Copyright 2015 the original author or authors. +# Copyright © 2015-2021 the original authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,67 +17,99 @@ # ############################################################################## -## -## Gradle start up script for UN*X -## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# ############################################################################## # Attempt to set APP_HOME + # Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" +MAX_FD=maximum warn () { echo "$*" -} +} >&2 die () { echo echo "$*" echo exit 1 -} +} >&2 # OS specific support (must be 'true' or 'false'). cygwin=false msys=false darwin=false nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; esac CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar @@ -87,9 +119,9 @@ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACMD=$JAVA_HOME/jre/sh/java else - JAVACMD="$JAVA_HOME/bin/java" + JAVACMD=$JAVA_HOME/bin/java fi if [ ! -x "$JAVACMD" ] ; then die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME @@ -98,88 +130,120 @@ Please set the JAVA_HOME variable in your environment to match the location of your Java installation." fi else - JAVACMD="java" - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the location of your Java installation." + fi fi # Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi -fi - -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi - -# For Cygwin or MSYS, switch paths to Windows format before running java -if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi - # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" - fi - i=`expr $i + 1` - done - case $i in - 0) set -- ;; - 1) set -- "$args0" ;; - 2) set -- "$args0" "$args1" ;; - 3) set -- "$args0" "$args1" "$args2" ;; - 4) set -- "$args0" "$args1" "$args2" "$args3" ;; - 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" esac fi -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=`save "$@"` +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat index 5093609..93e3f59 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -14,7 +14,7 @@ @rem limitations under the License. @rem -@if "%DEBUG%" == "" @echo off +@if "%DEBUG%"=="" @echo off @rem ########################################################################## @rem @rem Gradle startup script for Windows @@ -25,7 +25,8 @@ if "%OS%"=="Windows_NT" setlocal set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% @@ -40,7 +41,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init +if %ERRORLEVEL% equ 0 goto execute echo. echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. @@ -54,7 +55,7 @@ goto fail set JAVA_HOME=%JAVA_HOME:"=% set JAVA_EXE=%JAVA_HOME%/bin/java.exe -if exist "%JAVA_EXE%" goto init +if exist "%JAVA_EXE%" goto execute echo. echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% @@ -64,21 +65,6 @@ echo location of your Java installation. goto fail -:init -@rem Get command-line arguments, handling Windows variants - -if not "%OS%" == "Windows_NT" goto win9xME_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* - :execute @rem Setup the command line @@ -86,17 +72,19 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* :end @rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd +if %ERRORLEVEL% equ 0 goto mainEnd :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% :mainEnd if "%OS%"=="Windows_NT" endlocal diff --git a/settings.gradle b/settings.gradle deleted file mode 100644 index 1abe6a1..0000000 --- a/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -rootProject.name = 'spark-structured-streaming-wordcount' diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..432dc8d --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,14 @@ +pluginManagement { + repositories { + mavenCentral() + gradlePluginPortal() + } +} + +dependencyResolutionManagement { + repositories { + mavenCentral() + } +} + +rootProject.name = "spark-structured-streaming-wordcount" diff --git a/src/main/scala/com/barrelsofdata/sparkexamples/Driver.scala b/src/main/scala/com/barrelsofdata/sparkexamples/Driver.scala index dc1e5a0..79b730d 100644 --- a/src/main/scala/com/barrelsofdata/sparkexamples/Driver.scala +++ b/src/main/scala/com/barrelsofdata/sparkexamples/Driver.scala @@ -50,7 +50,7 @@ object Driver { def main(args: Array[String]): Unit = { if(args.length != 2) { println("Invalid usage") - println("Usage: spark-submit --master spark-structured-streaming-wordcount-1.0.jar ") + println("Usage: spark-submit --master spark-structured-streaming-wordcount-1.0.0.jar ") LOG.error(s"Invalid number of arguments, arguments given: [${args.mkString(",")}]") System.exit(1) }