Skip to content

Commit

Permalink
Spark 3.4 (#611)
Browse files Browse the repository at this point in the history
* Spark 3.4, multi build CI

* escape var

* update frameless, remove 3.4.0 from CI for now

* change block var

* fix reference

* don't use anchors

* use spark version in Jar name

* comment out tests

* add poetry.lock

* don't continue on error

* update poetry lock

* update frameless

* add tests back in

* change ulimit

* set timeZone

* support pyspark 3.2-3.4

* add env var for tests

---------

Co-authored-by: Thomas Maschler <[email protected]>
Co-authored-by: Grigory <[email protected]>
  • Loading branch information
3 people authored Oct 4, 2023
1 parent efc1bac commit 44f9bb4
Show file tree
Hide file tree
Showing 12 changed files with 1,633 additions and 1,302 deletions.
5 changes: 5 additions & 0 deletions .github/actions/init-python-env/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ inputs:
poetry_version:
description: 'Version of Poetry to configure'
default: '1.3.2'
spark_version:
description: 'Version of Spark to configure'
default: '3.4.0'

runs:
using: "composite"
Expand Down Expand Up @@ -36,5 +39,7 @@ runs:

- name: Install Poetry project dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
env:
SPARK_VERSION: ${{ inputs.spark_version }}
shell: bash
run: make init-python
88 changes: 80 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ jobs:
build-scala:
runs-on: ubuntu-20.04

strategy:
matrix:
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
uses: actions/checkout@v3
Expand All @@ -25,22 +32,30 @@ jobs:
uses: ./.github/actions/init-scala-env

- name: Compile Scala Project
env:
SPARK_VERSION: ${{ matrix.spark_version }}
run: make compile-scala

- name: Test Scala Project
# python/* branches are not supposed to change scala code, trust them
if: ${{ !startsWith(github.event.inputs.from_branch, 'python/') }}
run: make test-scala
env:
SPARK_VERSION: ${{ matrix.spark_version }}
run:
ulimit -c unlimited
make test-scala

- name: Build Spark Assembly
env:
SPARK_VERSION: ${{ matrix.spark_version }}
shell: bash
run: make build-scala

- name: Cache Spark Assembly
uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ github.sha }}
key: dist-${{ matrix.spark_version }}-${{ github.sha }}

build-python:
# scala/* branches are not supposed to change python code, trust them
Expand All @@ -50,7 +65,13 @@ jobs:

strategy:
matrix:
python: [ "3.8" ]
python:
- "3.8"
- "3.9"
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
Expand All @@ -61,6 +82,7 @@ jobs:
- uses: ./.github/actions/init-python-env
with:
python_version: ${{ matrix.python }}
spark_version: ${{ matrix.spark_version }}

- name: Static checks
shell: bash
Expand All @@ -69,18 +91,27 @@ jobs:
- uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ github.sha }}
key: dist-${{ matrix.spark_version }}-${{ github.sha }}

- name: Run tests
env:
SPARK_VERSION: ${{ matrix.spark_version }}
shell: bash
run: make test-python-quick

publish:
name: Publish Artifacts
publish-scala:
name: Publish Scala Artifacts
needs: [ build-scala, build-python ]
runs-on: ubuntu-20.04
if: (github.event_name != 'pull_request') && startsWith(github.ref, 'refs/tags/v')

strategy:
matrix:
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
uses: actions/checkout@v3
Expand All @@ -94,17 +125,58 @@ jobs:
shell: bash
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
SPARK_VERSION: ${{ matrix.spark_version }}
run: make publish-scala

- name: Build Spark Assembly
env:
SPARK_VERSION: ${{ matrix.spark_version }}
shell: bash
run: make build-scala

- name: Cache Spark Assembly
uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ matrix.spark_version }}-${{ github.ref }}


publish-python:
name: Publish Scala Artifacts
needs: [ publish-scala ]
runs-on: ubuntu-20.04
if: (github.event_name != 'pull_request') && startsWith(github.ref, 'refs/tags/v')

strategy:
matrix:
python:
- "3.8"
- "3.9"
spark_version:
- "3.2.4"
- "3.3.2"
- "3.4.0"

steps:
- name: Checkout Repository
uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: ./.github/actions/init-python-env
with:
python_version: "3.8"
python_version: ${{ matrix.python }}
spark_version: ${{ matrix.spark_version }}

- uses: actions/cache@v3
with:
path: ./dist/*
key: dist-${{ matrix.spark_version }}-${{ github.ref }}

- name: Build Python whl
shell: bash
run: make build-python


# TODO: Where does this go, do we need it?
# - name: upload artefacts
# uses: ./.github/actions/upload_artefacts
Expand Down
34 changes: 23 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
SHELL := /usr/bin/env bash
SHELL := env SPARK_VERSION=$(SPARK_VERSION) /usr/bin/env bash
SPARK_VERSION ?= 3.4.0

.PHONY: init test lint build docs notebooks help

DIST_DIR = ./dist

help:
@echo "init - Setup the repository"
@echo "clean - clean all compiled python files, build artifacts and virtual envs. Run \`make init\` anew afterwards."
Expand All @@ -18,27 +21,32 @@ test: test-scala test-python
###############

compile-scala:
sbt -v -batch compile test:compile it:compile
sbt -v -batch compile test:compile it:compile -DrfSparkVersion=${SPARK_VERSION}

test-scala: test-core-scala test-datasource-scala test-experimental-scala

test-core-scala:
sbt -batch core/test
sbt -batch core/test -DrfSparkVersion=${SPARK_VERSION}

test-datasource-scala:
sbt -batch datasource/test
sbt -batch datasource/test -DrfSparkVersion=${SPARK_VERSION}

test-experimental-scala:
sbt -batch experimental/test
sbt -batch experimental/test -DrfSparkVersion=${SPARK_VERSION}

build-scala: clean-build-scala
sbt "pyrasterframes/assembly" -DrfSparkVersion=${SPARK_VERSION}

build-scala:
sbt "pyrasterframes/assembly"
clean-build-scala:
if [ -d "$(DIST_DIR)" ]; then \
find ./dist -name 'pyrasterframes-assembly-${SPARK_VERSION}*.jar' -exec rm -fr {} +; \
fi

clean-scala:
sbt clean
sbt clean -DrfSparkVersion=${SPARK_VERSION}

publish-scala:
sbt publish
sbt publish -DrfSparkVersion=${SPARK_VERSION}

################
# PYTHON
Expand All @@ -49,9 +57,11 @@ init-python:
./.venv/bin/python -m pip install --upgrade pip
poetry self add "poetry-dynamic-versioning[plugin]"
poetry install
poetry add pyspark@${SPARK_VERSION}
poetry run pre-commit install

test-python: build-scala
poetry add pyspark@${SPARK_VERSION}
poetry run pytest -vv python/tests --cov=python/pyrasterframes --cov=python/geomesa_pyspark --cov-report=term-missing

test-python-quick:
Expand All @@ -72,8 +82,10 @@ notebooks-python: clean-notebooks-python
clean-python: clean-build-python clean-test-python clean-venv-python clean-docs-python clean-notebooks-python

clean-build-python:
find ./dist -name 'pyrasterframes*.whl' -exec rm -fr {} +
find ./dist -name 'pyrasterframes*.tar.gz' -exec rm -fr {} +
if [ -d "$(DIST_DIR)" ]; then \
find ./dist -name 'pyrasterframes*.whl' -exec rm -fr {} +; \
find ./dist -name 'pyrasterframes*.tar.gz' -exec rm -fr {} +; \
fi

clean-test-python:
rm -f .coverage
Expand Down
9 changes: 6 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ ThisBuild / dynverSonatypeSnapshots := true
ThisBuild / publishMavenStyle := true
ThisBuild / Test / publishArtifact := false


addCommandAlias("makeSite", "docs/makeSite")
addCommandAlias("previewSite", "docs/previewSite")
addCommandAlias("ghpagesPushSite", "docs/ghpagesPushSite")
Expand All @@ -38,13 +39,15 @@ lazy val IntegrationTest = config("it") extend Test
lazy val root = project
.withId("RasterFrames")
.aggregate(core, datasource)
.settings(publish / skip := true)
.settings(
publish / skip := true)

lazy val `rf-notebook` = project
.dependsOn(pyrasterframes)
.disablePlugins(CiReleasePlugin)
.enablePlugins(RFAssemblyPlugin, DockerPlugin)
.settings(publish / skip := true)
.settings(
publish / skip := true)

lazy val core = project
.enablePlugins(BuildInfoPlugin)
Expand Down Expand Up @@ -79,7 +82,7 @@ lazy val core = project
ExclusionRule(organization = "com.github.mpilquist")
),
scaffeine,
sparktestingbase excludeAll ExclusionRule("org.scala-lang.modules", "scala-xml_2.12"),
sparktestingbase().value % Test excludeAll ExclusionRule("org.scala-lang.modules", "scala-xml_2.12"),
`scala-logging`
),
libraryDependencies ++= {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class SlippyDataSourceSpec extends TestEnvironment with TestData with BeforeAndA

def tileFilesCount(dir: File): Long = {
val r = countFiles(dir, ".png")
println(dir, r)
r
}

Expand Down
Loading

0 comments on commit 44f9bb4

Please sign in to comment.