Skip to content

Commit

Permalink
Merge branch 'release/0.8.4'
Browse files Browse the repository at this point in the history
  • Loading branch information
metasim committed Nov 11, 2019
2 parents a8a8bb7 + 0e42a35 commit 71cead3
Show file tree
Hide file tree
Showing 74 changed files with 1,466 additions and 2,220 deletions.
14 changes: 6 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
sudo: false
dist: xenial
language: python

Expand Down Expand Up @@ -28,11 +27,10 @@ install:
- pip install rasterio shapely pandas numpy pweave
- wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf -

script:
- sbt/bin/sbt -java-home $JAVA_HOME -batch test
- sbt/bin/sbt -java-home $JAVA_HOME -batch it:test
# - sbt -Dfile.encoding=UTF8 clean coverage test coverageReport
# Tricks to avoid unnecessary cache updates
- find $HOME/.sbt -name "*.lock" | xargs rm
- find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm

jobs:
include:
- stage: "Unit Tests"
script: sbt/bin/sbt -java-home $JAVA_HOME -batch test
- stage: "Integration Tests"
script: sbt/bin/sbt -java-home $JAVA_HOME -batch it:test
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<img src="docs/src/main/paradox/_template/images/RasterFramesLogo.png" width="300px"/><sup style="vertical-align: top;">&reg;</sup>
<img src="docs/src/main/paradox/_template/assets/images/RasterFramesLogo.png" width="300px"/><sup style="vertical-align: top;">&reg;</sup>

[![Join the chat at https://gitter.im/locationtech/rasterframes](https://badges.gitter.im/locationtech/rasterframes.svg)](https://gitter.im/locationtech/rasterframes?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

RasterFrames® brings together Earth-observation (EO) data access, cloud computing, and DataFrame-based data science. The recent explosion of EO data from public and private satellite operators presents both a huge opportunity as well as a challenge to the data analysis community. It is _Big Data_ in the truest sense, and its footprint is rapidly getting bigger.

RasterFrames provides a DataFrame-centric view over arbitrary raster data, enabling spatiotemporal queries, map algebra raster operations, and compatibility with the ecosystem of Spark ML algorithms. By using DataFrames as the core cognitive and compute data model, it is able to deliver these features in a form that is both accessible to general analysts and scalable along with the rapidly growing data footprint.

<img src="docs/src/main/paradox/RasterFramePipeline.png" width="600px"/>
<img src="pyrasterframes/src/main/python/docs/static/rasterframes-pipeline-nologo.png" width="600px"/>

Please see the [Getting Started](http://rasterframes.io/getting-started.html) section of the Users' Manual to start using RasterFrames.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@ package org.locationtech.rasterframes.bench

import java.util.concurrent.TimeUnit

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.BoundReference
import org.apache.spark.sql.rf.TileUDT
import org.locationtech.rasterframes._
import org.locationtech.rasterframes.expressions.generators.ExplodeTiles
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.openjdk.jmh.annotations._

/**
*
* @author sfitch
Expand All @@ -37,32 +36,33 @@ import org.openjdk.jmh.annotations._
@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
class TileExplodeBench extends SparkEnv {
import spark.implicits._

//@Param(Array("uint8", "uint16ud255", "float32", "float64"))
@Param(Array("uint16ud255"))
@Param(Array("uint8", "uint16ud255", "float32", "float64"))
var cellTypeName: String = _

@Param(Array("256"))
var tileSize: Int = _

@Param(Array("2000"))
@Param(Array("100"))
var numTiles: Int = _

@transient
var tiles: Array[InternalRow] = _

var exploder: ExplodeTiles = _
var tiles: DataFrame = _

@Setup(Level.Trial)
def setupData(): Unit = {
tiles = Array.fill(numTiles)(randomTile(tileSize, tileSize, cellTypeName))
.map(t => InternalRow(TileUDT.tileSerializer.toInternalRow(t)))
val expr = BoundReference(0, TileType, true)
exploder = new ExplodeTiles(1.0, None, Seq(expr))
tiles = Seq.fill(numTiles)(randomTile(tileSize, tileSize, cellTypeName))
.toDF("tile").repartition(10)
}

@Benchmark
def arrayExplode() = {
tiles.select(posexplode(rf_tile_to_array_double($"tile"))).count()
}

@Benchmark
def tileExplode() = {
for(t <- tiles)
exploder.eval(t)
tiles.select(rf_explode_tiles($"tile")).count()
}
}
5 changes: 4 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ lazy val root = project
.withId("RasterFrames")
.aggregate(core, datasource, pyrasterframes, experimental)
.enablePlugins(RFReleasePlugin)
.settings(publish / skip := true)
.settings(
publish / skip := true,
clean := clean.dependsOn(`rf-notebook`/clean).value
)

lazy val `rf-notebook` = project
.dependsOn(pyrasterframes)
Expand Down
63 changes: 29 additions & 34 deletions build/circleci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,40 @@ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/

# most of these libraries required for
# python-pip pandoc && pip install setuptools => required for pyrasterframes testing
RUN sudo apt-get update && \
RUN \
sudo apt-get update && \
sudo apt remove \
python python-minimal python2.7 python2.7-minimal \
libpython-stdlib libpython2.7 libpython2.7-minimal libpython2.7-stdlib \
&& sudo apt-get install -y \
pandoc \
wget \
gcc g++ build-essential \
&& \
sudo apt-get install -y \
pandoc wget \
gcc g++ build-essential bash-completion cmake imagemagick \
libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev \
libcurl4-gnutls-dev \
libproj-dev \
libgeos-dev \
libhdf4-alt-dev \
bash-completion \
cmake \
imagemagick \
libpng-dev \
libffi-dev \
&& sudo apt autoremove \
&& sudo apt-get clean all
# && sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 1
# todo s
liblzma-dev libcurl4-gnutls-dev libproj-dev libgeos-dev libhdf4-alt-dev libpng-dev libffi-dev \
&& \
sudo apt autoremove && \
sudo apt-get clean all

RUN cd /tmp && \
wget https://www.python.org/ftp/python/3.7.4/Python-3.7.4.tgz && \
tar xzf Python-3.7.4.tgz && \
cd Python-3.7.4 && \
./configure --with-ensurepip=install --prefix=/usr/local --enable-optimization && \
make && \
sudo make altinstall && \
rm -rf Python-3.7.4*
RUN \
cd /tmp && \
wget https://www.python.org/ftp/python/3.7.4/Python-3.7.4.tgz && \
tar xzf Python-3.7.4.tgz && \
cd Python-3.7.4 && \
./configure --with-ensurepip=install --prefix=/usr/local --enable-optimization && \
make && \
sudo make altinstall && \
rm -rf Python-3.7.4*

RUN sudo ln -s /usr/local/bin/python3.7 /usr/local/bin/python && \
sudo curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
sudo python get-pip.py && \
sudo pip3 install setuptools ipython==6.2.1
RUN \
sudo ln -s /usr/local/bin/python3.7 /usr/local/bin/python && \
sudo curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
sudo python get-pip.py && \
sudo pip3 install setuptools ipython==6.2.1

# install OpenJPEG
RUN cd /tmp && \
RUN \
cd /tmp && \
wget https://github.com/uclouvain/openjpeg/archive/v${OPENJPEG_VERSION}.tar.gz && \
tar -xf v${OPENJPEG_VERSION}.tar.gz && \
cd openjpeg-${OPENJPEG_VERSION}/ && \
Expand All @@ -56,7 +51,8 @@ RUN cd /tmp && \
cd /tmp && rm -Rf v${OPENJPEG_VERSION}.tar.gz openjpeg*

# Compile and install GDAL with Java bindings
RUN cd /tmp && \
RUN \
cd /tmp && \
wget http://download.osgeo.org/gdal/${GDAL_VERSION}/gdal-${GDAL_VERSION}.tar.gz && \
tar -xf gdal-${GDAL_VERSION}.tar.gz && \
cd gdal-${GDAL_VERSION} && \
Expand All @@ -73,8 +69,7 @@ RUN cd /tmp && \
--with-threads \
--without-jp2mrsid \
--without-netcdf \
--without-ecw \
&& \
--without-ecw && \
make -j 8 && \
sudo make install && \
sudo ldconfig && \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,22 @@ trait RasterFunctions {
/** Extracts the bounding box from a RasterSource or ProjectedRasterTile */
def rf_extent(col: Column): TypedColumn[Any, Extent] = GetExtent(col)

/** Constructs a XZ2 index in WGS84 from either a Geometry, Extent, ProjectedRasterTile, or RasterSource and its CRS
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
def rf_spatial_index(targetExtent: Column, targetCRS: Column, indexResolution: Short) = XZ2Indexer(targetExtent, targetCRS, indexResolution)

/** Constructs a XZ2 index in WGS84 from either a Geometry, Extent, ProjectedRasterTile, or RasterSource and its CRS
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
def rf_spatial_index(targetExtent: Column, targetCRS: Column) = XZ2Indexer(targetExtent, targetCRS, 18: Short)

/** Constructs a XZ2 index with level 18 resolution in WGS84 from either a ProjectedRasterTile or RasterSource
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
def rf_spatial_index(targetExtent: Column, indexResolution: Short) = XZ2Indexer(targetExtent, indexResolution)

/** Constructs a XZ2 index with level 18 resolution in WGS84 from either a ProjectedRasterTile or RasterSource
* For details: https://www.geomesa.org/documentation/user/datastores/index_overview.html */
def rf_spatial_index(targetExtent: Column) = XZ2Indexer(targetExtent, 18: Short)

/** Extracts the CRS from a RasterSource or ProjectedRasterTile */
def rf_crs(col: Column): TypedColumn[Any, CRS] = GetCRS(col)

Expand Down Expand Up @@ -276,12 +292,38 @@ trait RasterFunctions {
}

/** Where the rf_mask tile contains NODATA, replace values in the source tile with NODATA */
def rf_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
Mask.MaskByDefined(sourceTile, maskTile)
def rf_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] = rf_mask(sourceTile, maskTile, false)

/** Where the rf_mask tile contains NODATA, replace values in the source tile with NODATA */
def rf_mask(sourceTile: Column, maskTile: Column, inverse: Boolean=false): TypedColumn[Any, Tile] =
if(!inverse) Mask.MaskByDefined(sourceTile, maskTile)
else Mask.InverseMaskByDefined(sourceTile, maskTile)

/** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column, inverse: Boolean=false): TypedColumn[Any, Tile] =
if (!inverse) Mask.MaskByValue(sourceTile, maskTile, maskValue)
else Mask.InverseMaskByValue(sourceTile, maskTile, maskValue)

/** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
Mask.MaskByValue(sourceTile, maskTile, maskValue)
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Int, inverse: Boolean): TypedColumn[Any, Tile] =
rf_mask_by_value(sourceTile, maskTile, lit(maskValue), inverse)

/** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */
def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Int): TypedColumn[Any, Tile] =
rf_mask_by_value(sourceTile, maskTile, maskValue, false)

/** Generate a tile with the values from `data_tile`, but where cells in the `mask_tile` are in the `mask_values`
list, replace the value with NODATA. */
def rf_mask_by_values(sourceTile: Column, maskTile: Column, maskValues: Column): TypedColumn[Any, Tile] =
Mask.MaskByValues(sourceTile, maskTile, maskValues)

/** Generate a tile with the values from `data_tile`, but where cells in the `mask_tile` are in the `mask_values`
list, replace the value with NODATA. */
def rf_mask_by_values(sourceTile: Column, maskTile: Column, maskValues: Seq[Int]): TypedColumn[Any, Tile] = {
import org.apache.spark.sql.functions.array
val valuesCol: Column = array(maskValues.map(lit).toSeq: _*)
rf_mask_by_values(sourceTile, maskTile, valuesCol)
}

/** Where the `maskTile` does **not** contain `NoData`, replace values in the source tile with `NoData` */
def rf_inverse_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
Expand All @@ -291,6 +333,10 @@ trait RasterFunctions {
def rf_inverse_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
Mask.InverseMaskByValue(sourceTile, maskTile, maskValue)

/** Where the `maskTile` does **not** equal `maskValue`, replace values in the source tile with `NoData` */
def rf_inverse_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Int): TypedColumn[Any, Tile] =
Mask.InverseMaskByValue(sourceTile, maskTile, lit(maskValue))

/** Create a tile where cells in the grid defined by cols, rows, and bounds are filled with the given value. */
def rf_rasterize(geometry: Column, bounds: Column, value: Column, cols: Int, rows: Int): TypedColumn[Any, Tile] =
withTypedAlias("rf_rasterize", geometry)(
Expand Down Expand Up @@ -389,6 +435,12 @@ trait RasterFunctions {
/** Cellwise inequality comparison between a tile and a scalar. */
def rf_local_unequal[T: Numeric](tileCol: Column, value: T): Column = Unequal(tileCol, value)

/** Test if each cell value is in provided array */
def rf_local_is_in(tileCol: Column, arrayCol: Column) = IsIn(tileCol, arrayCol)

/** Test if each cell value is in provided array */
def rf_local_is_in(tileCol: Column, array: Array[Int]) = IsIn(tileCol, array)

/** Return a tile with ones where the input is NoData, otherwise zero */
def rf_local_no_data(tileCol: Column): Column = Undefined(tileCol)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ package org.locationtech.rasterframes.expressions

import geotrellis.proj4.CRS
import geotrellis.raster.{CellGrid, Tile}
import geotrellis.vector.Extent
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.jts.JTSTypes
import org.apache.spark.sql.rf.{RasterSourceUDT, TileUDT}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.locationtech.jts.geom.Envelope
import org.locationtech.rasterframes.encoders.CatalystSerializer._
import org.locationtech.rasterframes.model.{LazyCRS, TileContext}
import org.locationtech.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource}
Expand Down Expand Up @@ -94,6 +97,15 @@ object DynamicExtractors {
(v: Any) => v.asInstanceOf[InternalRow].to[CRS]
}

lazy val extentLikeExtractor: PartialFunction[DataType, Any Extent] = {
case t if org.apache.spark.sql.rf.WithTypeConformity(t).conformsTo(JTSTypes.GeometryTypeInstance) =>
(input: Any) => JTSTypes.GeometryTypeInstance.deserialize(input).getEnvelopeInternal
case t if t.conformsTo[Extent] =>
(input: Any) => input.asInstanceOf[InternalRow].to[Extent]
case t if t.conformsTo[Envelope] =>
(input: Any) => Extent(input.asInstanceOf[InternalRow].to[Envelope])
}

sealed trait TileOrNumberArg
sealed trait NumberArg extends TileOrNumberArg
case class TileArg(tile: Tile, ctx: Option[TileContext]) extends TileOrNumberArg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ object CellStatsAggregate {
import org.locationtech.rasterframes.encoders.StandardEncoders.cellStatsEncoder

def apply(col: Column): TypedColumn[Any, CellStatistics] =
new Column(new CellStatsAggregateUDAF(col.expr))
.as(s"rf_agg_stats($col)") // node renaming in class doesn't seem to propogate
new CellStatsAggregate()(ExtractTile(col))
.as(s"rf_agg_stats($col)")
.as[CellStatistics]

/** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ object HistogramAggregate {
import org.locationtech.rasterframes.encoders.StandardEncoders.cellHistEncoder

def apply(col: Column): TypedColumn[Any, CellHistogram] =
new Column(new HistogramAggregateUDAF(col.expr))
.as(s"rf_agg_approx_histogram($col)") // node renaming in class doesn't seem to propogate
new HistogramAggregate()(ExtractTile(col))
.as(s"rf_agg_approx_histogram($col)")
.as[CellHistogram]

/** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ object LocalCountAggregate {
object LocalDataCellsUDAF {
def apply(child: Expression): LocalDataCellsUDAF = new LocalDataCellsUDAF(child)
def apply(tile: Column): TypedColumn[Any, Tile] =
new Column(new LocalDataCellsUDAF(tile.expr))
new LocalCountAggregate(true)(ExtractTile(tile))
.as(s"rf_agg_local_data_cells($tile)")
.as[Tile]
}
Expand All @@ -107,7 +107,7 @@ object LocalCountAggregate {
object LocalNoDataCellsUDAF {
def apply(child: Expression): LocalNoDataCellsUDAF = new LocalNoDataCellsUDAF(child)
def apply(tile: Column): TypedColumn[Any, Tile] =
new Column(new LocalNoDataCellsUDAF(tile.expr))
new LocalCountAggregate(false)(ExtractTile(tile))
.as(s"rf_agg_local_no_data_cells($tile)")
.as[Tile]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class LocalStatsAggregate() extends UserDefinedAggregateFunction {
object LocalStatsAggregate {

def apply(col: Column): TypedColumn[Any, LocalCellStatistics] =
new Column(LocalStatsAggregateUDAF(col.expr))
new LocalStatsAggregate()(ExtractTile(col))
.as(s"rf_agg_local_stats($col)")
.as[LocalCellStatistics]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ object LocalTileOpAggregate {
}
object LocalMinUDAF {
def apply(child: Expression): LocalMinUDAF = new LocalMinUDAF(child)
def apply(tile: Column): TypedColumn[Any, Tile] = new Column(new LocalMinUDAF(tile.expr)).as[Tile]
def apply(tile: Column): TypedColumn[Any, Tile] =
new LocalTileOpAggregate(BiasedMin)(ExtractTile(tile))
.as(s"rf_agg_local_min($tile)")
.as[Tile]
}

@ExpressionDescription(
Expand All @@ -95,6 +98,9 @@ object LocalTileOpAggregate {
}
object LocalMaxUDAF {
def apply(child: Expression): LocalMaxUDAF = new LocalMaxUDAF(child)
def apply(tile: Column): TypedColumn[Any, Tile] = new Column(new LocalMaxUDAF(tile.expr)).as[Tile]
def apply(tile: Column): TypedColumn[Any, Tile] =
new LocalTileOpAggregate(BiasedMax)(ExtractTile(tile))
.as(s"rf_agg_local_max($tile)")
.as[Tile]
}
}
Loading

0 comments on commit 71cead3

Please sign in to comment.