diff --git a/Dockerfile b/Dockerfile index 6751394f..721712fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,9 @@ RUN mkdir -p /assets/ && cd /assets && \ curl -OL https://downloads.datastax.com/enterprise/cqlsh-astra.tar.gz && \ tar -xzf ./cqlsh-astra.tar.gz && \ rm ./cqlsh-astra.tar.gz && \ - curl -OL https://archive.apache.org/dist/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3-scala2.13.tgz && \ - tar -xzf ./spark-3.5.3-bin-hadoop3-scala2.13.tgz && \ - rm ./spark-3.5.3-bin-hadoop3-scala2.13.tgz + curl -OL https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3-scala2.13.tgz && \ + tar -xzf ./spark-3.5.4-bin-hadoop3-scala2.13.tgz && \ + rm ./spark-3.5.4-bin-hadoop3-scala2.13.tgz RUN apt-get update && apt-get install -y openssh-server vim python3 --no-install-recommends && \ rm -rf /var/lib/apt/lists/* && \ @@ -44,7 +44,7 @@ RUN chmod +x ./get-latest-maven-version.sh && \ rm -rf "$USER_HOME_DIR/.m2" # Add all migration tools to path -ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-3.5.3-bin-hadoop3-scala2.13/bin/" +ENV PATH="${PATH}:/assets/dsbulk/bin/:/assets/cqlsh-astra/bin/:/assets/spark-3.5.4-bin-hadoop3-scala2.13/bin/" EXPOSE 22 diff --git a/README.md b/README.md index 1963bff1..6aff1b03 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Migrate and Validate Tables between Origin and Target Cassandra Clusters. > [!IMPORTANT] -> Please note this job has been tested with spark version [3.5.3](https://archive.apache.org/dist/spark/spark-3.5.3/) +> Please note this job has been tested with spark version [3.5.4](https://archive.apache.org/dist/spark/spark-3.5.4/) ## Install as a Container - Get the latest image that includes all dependencies from [DockerHub](https://hub.docker.com/r/datastax/cassandra-data-migrator) @@ -20,14 +20,14 @@ Migrate and Validate Tables between Origin and Target Cassandra Clusters. ### Prerequisite - **Java11** (minimum) as Spark binaries are compiled with it. - **Spark `3.5.x` with Scala `2.13` and Hadoop `3.3`** - - Typically installed using [this binary](https://archive.apache.org/dist/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3-scala2.13.tgz) on a single VM (no cluster necessary) where you want to run this job. This simple setup is recommended for most one-time migrations. + - Typically installed using [this binary](https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3-scala2.13.tgz) on a single VM (no cluster necessary) where you want to run this job. This simple setup is recommended for most one-time migrations. - However we recommend using a Spark Cluster or a Spark Serverless platform like `Databricks` or `Google Dataproc` (that supports the above mentioned versions) for large (e.g. several terabytes) complex migrations OR when CDM is used as a long-term data-transfer utility and not a one-time job. Spark can be installed by running the following: - ``` -wget https://archive.apache.org/dist/spark/spark-3.5.3/spark-3.5.3-bin-hadoop3-scala2.13.tgz -tar -xvzf spark-3.5.3-bin-hadoop3-scala2.13.tgz +wget https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3-scala2.13.tgz +tar -xvzf spark-3.5.4-bin-hadoop3-scala2.13.tgz ``` > [!CAUTION] diff --git a/RELEASE.md b/RELEASE.md index fc598006..149d9498 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,5 +1,9 @@ # Release Notes +## [5.2.0] - 2025-xx-xx +- Upgraded to use Spark `3.5.4`. +- Cassandra Docker image tag is now set to `cassandra:5`. + ## [5.1.4] - 2024-12-04 - Bug fix: Any run started with a `previousRunId` that is not found in the `cdm_run_info` table (for whatever reason), will be executed as a fresh new run instead of doing nothing. diff --git a/SIT/environment.sh b/SIT/environment.sh index cc7cff5d..d12d424a 100755 --- a/SIT/environment.sh +++ b/SIT/environment.sh @@ -68,7 +68,7 @@ fi ### # These variables are hard-coded for now SUBNET=$(echo ${CIDR} | cut -d. -f1-3) -CASS_VERSION=5.0 +CASS_VERSION=5 CDM_VERSION=latest #============================================================================================================================== # Helper Functions diff --git a/pom.xml b/pom.xml index 71a4ea2f..ee99edf9 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ UTF-8 2.13.15 2.13 - 3.5.3 + 3.5.4 3.5.1 5.0-rc1 5.9.1