diff --git a/404.html b/404.html index 658e2abf..d035ff63 100644 --- a/404.html +++ b/404.html @@ -791,6 +791,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -997,6 +1017,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1283,6 +1323,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/conversions/csv_to_geodataframe/index.html b/conversions/csv_to_geodataframe/index.html index 5e5f0e18..d025edd0 100644 --- a/conversions/csv_to_geodataframe/index.html +++ b/conversions/csv_to_geodataframe/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/conversions/raster_to_dataframe/index.html b/conversions/raster_to_dataframe/index.html index 28440c9d..7c969158 100644 --- a/conversions/raster_to_dataframe/index.html +++ b/conversions/raster_to_dataframe/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/dependency_licenses/index.html b/dependency_licenses/index.html index 18e2be34..1b12a0c3 100644 --- a/dependency_licenses/index.html +++ b/dependency_licenses/index.html @@ -803,6 +803,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1009,6 +1029,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1295,6 +1335,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/exploratory_analyses/dbscan/index.html b/exploratory_analyses/dbscan/index.html index 5faf25cc..1fdc84f7 100644 --- a/exploratory_analyses/dbscan/index.html +++ b/exploratory_analyses/dbscan/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/exploratory_analyses/descriptive_statistics/index.html b/exploratory_analyses/descriptive_statistics/index.html index becac991..6acbab71 100644 --- a/exploratory_analyses/descriptive_statistics/index.html +++ b/exploratory_analyses/descriptive_statistics/index.html @@ -863,6 +863,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1069,6 +1089,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1355,6 +1395,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1687,7 +1747,8 @@

    Generate descriptive statistics from raster data.

    -

    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.

    +

    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness. +Nodata values are removed from the data before the statistics are computed.

    @@ -1759,11 +1820,15 @@

    73 74 75 -76
    @beartype
    +76
    +77
    +78
    +79
    @beartype
     def descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:
         """Generate descriptive statistics from raster data.
     
         Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
    +    Nodata values are removed from the data before the statistics are computed.
     
         Args:
             input_data: Data to generate descriptive statistics from.
    @@ -1772,6 +1837,8 @@ 

    The descriptive statistics in previously described order. """ data = input_data.read().flatten() + nodata_value = input_data.nodata + data = data[data != nodata_value] statistics = _descriptive_statistics(data) return statistics

    diff --git a/exploratory_analyses/feature_importance/index.html b/exploratory_analyses/feature_importance/index.html index 3760668d..18f7ed16 100644 --- a/exploratory_analyses/feature_importance/index.html +++ b/exploratory_analyses/feature_importance/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/exploratory_analyses/k_means_cluster/index.html b/exploratory_analyses/k_means_cluster/index.html index 2463adcc..2753baae 100644 --- a/exploratory_analyses/k_means_cluster/index.html +++ b/exploratory_analyses/k_means_cluster/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/exploratory_analyses/parallel_coordinates/index.html b/exploratory_analyses/parallel_coordinates/index.html index 8b39921e..2a325cff 100644 --- a/exploratory_analyses/parallel_coordinates/index.html +++ b/exploratory_analyses/parallel_coordinates/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/exploratory_analyses/pca/index.html b/exploratory_analyses/pca/index.html index 1a223f65..ebab5181 100644 --- a/exploratory_analyses/pca/index.html +++ b/exploratory_analyses/pca/index.html @@ -863,6 +863,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1069,6 +1089,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1355,6 +1395,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/exploratory_analyses/statistical_testing/index.html b/exploratory_analyses/statistical_testing/index.html index b7d6d359..d1eabdb9 100644 --- a/exploratory_analyses/statistical_testing/index.html +++ b/exploratory_analyses/statistical_testing/index.html @@ -877,6 +877,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1083,6 +1103,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1369,6 +1409,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1634,7 +1694,7 @@

    columns - Sequence[str] + Optional[Sequence[str]]
    @@ -1750,7 +1810,7 @@

    49 50 51

    @beartype
    -def chi_square_test(data: pd.DataFrame, target_column: str, columns: Sequence[str] = None) -> dict:
    +def chi_square_test(data: pd.DataFrame, target_column: str, columns: Optional[Sequence[str]] = None) -> dict:
         """Compute Chi-square test for independence on the input data.
     
         It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings,
    @@ -1771,7 +1831,7 @@ 

    if check_empty_dataframe(data): raise exceptions.EmptyDataFrameException("The input Dataframe is empty.") - if not check_columns_valid(data, target_column): + if not check_columns_valid(data, [target_column]): raise exceptions.InvalidParameterValueException("Target column not found in the Dataframe.") if columns is not None: @@ -1994,7 +2054,7 @@

    "The argument min_periods is available only with correlation methods 'pearson' and 'spearman'." ) - matrix = data.corr(method=correlation_method, min_periods=min_periods) + matrix = data.corr(method=correlation_method, min_periods=min_periods, numeric_only=True) return matrix

    diff --git a/index.html b/index.html index d1fbe1c7..1f8354f5 100644 --- a/index.html +++ b/index.html @@ -808,6 +808,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1014,6 +1034,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1300,6 +1340,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/objects.inv b/objects.inv index 42c0bc96..45dc0b85 100644 Binary files a/objects.inv and b/objects.inv differ diff --git a/prediction/fuzzy_overlay/index.html b/prediction/fuzzy_overlay/index.html index 56f2ace1..72a3c7e6 100644 --- a/prediction/fuzzy_overlay/index.html +++ b/prediction/fuzzy_overlay/index.html @@ -884,6 +884,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1090,6 +1110,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1376,6 +1416,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/prediction/weights_of_evidence/index.html b/prediction/weights_of_evidence/index.html index 7a3da600..ab40a81b 100644 --- a/prediction/weights_of_evidence/index.html +++ b/prediction/weights_of_evidence/index.html @@ -863,6 +863,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1069,6 +1089,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1355,6 +1395,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/check_raster_grids/index.html b/raster_processing/check_raster_grids/index.html index 5088c634..c3737567 100644 --- a/raster_processing/check_raster_grids/index.html +++ b/raster_processing/check_raster_grids/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/clipping/index.html b/raster_processing/clipping/index.html index 155b0df5..9a55cd1f 100644 --- a/raster_processing/clipping/index.html +++ b/raster_processing/clipping/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/create_constant_raster/index.html b/raster_processing/create_constant_raster/index.html index 17cc8510..0b72c298 100644 --- a/raster_processing/create_constant_raster/index.html +++ b/raster_processing/create_constant_raster/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/extract_values_from_raster/index.html b/raster_processing/extract_values_from_raster/index.html index dbec9648..1b5369e1 100644 --- a/raster_processing/extract_values_from_raster/index.html +++ b/raster_processing/extract_values_from_raster/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/reprojecting/index.html b/raster_processing/reprojecting/index.html index 473f9cb3..869987ba 100644 --- a/raster_processing/reprojecting/index.html +++ b/raster_processing/reprojecting/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/resampling/index.html b/raster_processing/resampling/index.html index 31a20944..be04d381 100644 --- a/raster_processing/resampling/index.html +++ b/raster_processing/resampling/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/snapping/index.html b/raster_processing/snapping/index.html index 21c54dab..af6a4271 100644 --- a/raster_processing/snapping/index.html +++ b/raster_processing/snapping/index.html @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/unifying/index.html b/raster_processing/unifying/index.html index 8edb949e..65a4d58f 100644 --- a/raster_processing/unifying/index.html +++ b/raster_processing/unifying/index.html @@ -12,7 +12,7 @@ - + @@ -856,6 +856,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/raster_processing/unique_combinations/index.html b/raster_processing/unique_combinations/index.html new file mode 100644 index 00000000..c08ef177 --- /dev/null +++ b/raster_processing/unique_combinations/index.html @@ -0,0 +1,1802 @@ + + + + + + + + + + + + + + + + + + + + + + + Unique combinations in rasters - EIS Toolkit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    Unique combinations in rasters

    + + +
    + + + + +
    + + + +
    + + + + + + + + + + +
    + + + + +

    + unique_combinations(raster_list) + +

    + + +
    + +

    Get combinations of raster values between rasters.

    +

    All bands in all rasters are used for analysis. +The first band of the first raster is used for reference when making the output.

    + + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    raster_list + Sequence[DatasetReader] + +
    +

    Rasters to be used for finding combinations.

    +
    +
    + required +
    + + + +

    Returns:

    + + + + + + + + + + + + + + + + + +
    Name TypeDescription
    out_image + ndarray + +
    +

    Combinations of rasters.

    +
    +
    out_meta + dict + +
    +

    The metadata of the first raster in raster_list.

    +
    +
    + +
    + Source code in eis_toolkit/raster_processing/unique_combinations.py +
    27
    +28
    +29
    +30
    +31
    +32
    +33
    +34
    +35
    +36
    +37
    +38
    +39
    +40
    +41
    +42
    +43
    +44
    +45
    +46
    +47
    +48
    +49
    +50
    +51
    +52
    +53
    +54
    +55
    +56
    +57
    +58
    @beartype
    +def unique_combinations(  # type: ignore[no-any-unimported]
    +    raster_list: Sequence[rasterio.io.DatasetReader],
    +) -> Tuple[np.ndarray, dict]:
    +    """Get combinations of raster values between rasters.
    +
    +    All bands in all rasters are used for analysis.
    +    The first band of the first raster is used for reference when making the output.
    +
    +    Args:
    +        raster_list: Rasters to be used for finding combinations.
    +
    +    Returns:
    +        out_image: Combinations of rasters.
    +        out_meta: The metadata of the first raster in raster_list.
    +    """
    +    bands = []
    +    out_meta = raster_list[0].meta
    +    out_meta["count"] = 1
    +
    +    for raster in raster_list:
    +        for band in range(1, raster.count + 1):
    +            bands.append(raster.read(band))
    +
    +    if len(bands) == 1:
    +        raise InvalidParameterValueException("Expected to have more bands than 1")
    +
    +    if check_raster_grids(raster_list) is not True:
    +        raise InvalidParameterValueException("Expected raster grids to be of same shape")
    +
    +    out_image = _unique_combinations(bands)
    +    return out_image, out_meta
    +
    +
    +
    + +
    + + + +
    + +
    + +
    + + + + + + +
    +
    + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/raster_processing/windowing/index.html b/raster_processing/windowing/index.html index f119c5f9..9586c88e 100644 --- a/raster_processing/windowing/index.html +++ b/raster_processing/windowing/index.html @@ -9,7 +9,7 @@ - + @@ -801,6 +801,26 @@ + +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/search/search_index.json b/search/search_index.json index 9fa1bb68..b6009a75 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"General","text":"

    This is the documentation site of the eis_toolkit python package. Here you can find documentation for each module. The documentation is automatically generated from docstrings.

    Development of eis_toolkit is related to EIS Horizon EU project.

    "},{"location":"dependency_licenses/","title":"Dependency licenses","text":"Name Version License protobuf 3.19.4 3-Clause BSD License tensorboard-plugin-wit 1.8.1 Apache 2.0 absl-py 1.2.0 Apache Software License flatbuffers 1.12 Apache Software License ghp-import 2.1.0 Apache Software License google-auth 2.11.0 Apache Software License google-auth-oauthlib 0.4.6 Apache Software License google-pasta 0.2.0 Apache Software License grpcio 1.48.1 Apache Software License importlib-metadata 4.12.0 Apache Software License keras 2.9.0 Apache Software License libclang 14.0.6 Apache Software License requests 2.28.1 Apache Software License rsa 4.9 Apache Software License tenacity 8.2.2 Apache Software License tensorboard 2.9.1 Apache Software License tensorboard-data-server 0.6.1 Apache Software License tensorflow 2.9.2 Apache Software License tensorflow-estimator 2.9.0 Apache Software License tensorflow-io-gcs-filesystem 0.26.0 Apache Software License watchdog 2.1.9 Apache Software License packaging 21.3 Apache Software License; BSD License python-dateutil 2.8.2 Apache Software License; BSD License affine 2.3.1 BSD cligj 0.7.2 BSD geopandas 0.11.1 BSD Fiona 1.8.21 BSD License Jinja2 3.1.2 BSD License Markdown 3.3.7 BSD License MarkupSafe 2.1.1 BSD License Pygments 2.13.0 BSD License Shapely 1.8.4 BSD License Werkzeug 2.2.2 BSD License astunparse 1.6.3 BSD License click 8.1.3 BSD License click-plugins 1.1.1 BSD License cycler 0.11.0 BSD License gast 0.4.0 BSD License h5py 3.7.0 BSD License idna 3.3 BSD License joblib 1.1.0 BSD License kiwisolver 1.4.4 BSD License mkdocs 1.3.1 BSD License numpy 1.23.2 BSD License oauthlib 3.2.0 BSD License pandas 1.4.4 BSD License patsy 0.5.2 BSD License pyasn1 0.4.8 BSD License pyasn1-modules 0.2.8 BSD License rasterio 1.3.2 BSD License requests-oauthlib 1.3.1 BSD License scikit-learn 1.1.2 BSD License scipy 1.9.1 BSD License statsmodels 0.13.2 BSD License threadpoolctl 3.1.0 BSD License wrapt 1.14.1 BSD License eis-toolkit 0.1.0 European Union Public Licence 1.2 (EUPL 1.2) Pillow 9.2.0 Historical Permission Notice and Disclaimer (HPND) opt-einsum 3.3.0 MIT snuggs 1.4.7 MIT GDAL 3.4.3 MIT License Keras-Preprocessing 1.1.2 MIT License PyYAML 6.0 MIT License attrs 22.1.0 MIT License cachetools 5.2.0 MIT License charset-normalizer 2.1.1 MIT License fonttools 4.37.1 MIT License mergedeep 1.3.4 MIT License mkdocs-material 8.4.2 MIT License mkdocs-material-extensions 1.0.3 MIT License munch 2.5.0 MIT License plotly 5.14.0 MIT License pymdown-extensions 9.5 MIT License pyparsing 3.0.9 MIT License pyproj 3.3.1 MIT License pytz 2022.2.1 MIT License pyyaml_env_tag 0.1 MIT License setuptools-scm 6.4.2 MIT License six 1.16.0 MIT License termcolor 1.1.0 MIT License tomli 2.0.1 MIT License urllib3 1.26.12 MIT License zipp 3.8.1 MIT License certifi 2022.6.15 Mozilla Public License 2.0 (MPL 2.0) matplotlib 3.5.3 Python Software Foundation License typing_extensions 4.3.0 Python Software Foundation License"},{"location":"conversions/csv_to_geodataframe/","title":"Convert csv to geodataframe","text":""},{"location":"conversions/csv_to_geodataframe/#eis_toolkit.conversions.csv_to_geodataframe.csv_to_geodataframe","title":"csv_to_geodataframe(csv, indexes, target_crs)","text":"

    Read CSV file to a GeoDataFrame.

    Usage of single index expects valid WKT geometry. Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.

    Parameters:

    Name Type Description Default csv Path

    Path to the .csv file to be read.

    required indexes Sequence[int]

    Index(es) of the geometry column(s).

    required target_crs int

    Target CRS as an EPSG code.

    required

    Returns:

    Type Description GeoDataFrame

    CSV file read to a GeoDataFrame.

    Source code in eis_toolkit/conversions/csv_to_geodataframe.py
    @beartype\ndef csv_to_geodataframe(\n    csv: Path,\n    indexes: Sequence[int],\n    target_crs: int,\n) -> geopandas.GeoDataFrame:\n    \"\"\"\n    Read CSV file to a GeoDataFrame.\n\n    Usage of single index expects valid WKT geometry.\n    Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.\n\n    Args:\n        csv: Path to the .csv file to be read.\n        indexes: Index(es) of the geometry column(s).\n        target_crs: Target CRS as an EPSG code.\n\n    Returns:\n        CSV file read to a GeoDataFrame.\n    \"\"\"\n\n    data_frame = _csv_to_geodataframe(\n        csv=csv,\n        indexes=indexes,\n        target_crs=target_crs,\n    )\n    return data_frame\n
    "},{"location":"conversions/raster_to_dataframe/","title":"Convert raster to dataframe","text":""},{"location":"conversions/raster_to_dataframe/#eis_toolkit.conversions.raster_to_dataframe.raster_to_dataframe","title":"raster_to_dataframe(raster, bands=None, add_coordinates=False)","text":"

    Convert raster to Pandas DataFrame.

    If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g., band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to dataframe by setting add_coordinates to True.

    Parameters:

    Name Type Description Default raster DatasetReader

    Raster to be converted.

    required bands Optional[Sequence[int]]

    Selected bands from multiband raster. Indexing begins from one. Defaults to None.

    None add_coordinates bool

    Determines if pixel coordinates are written into dataframe. Defaults to False.

    False

    Returns:

    Type Description DataFrame

    Raster converted to a DataFrame.

    Source code in eis_toolkit/conversions/raster_to_dataframe.py
    @beartype\ndef raster_to_dataframe(\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    add_coordinates: bool = False,\n) -> pd.DataFrame:\n    \"\"\"Convert raster to Pandas DataFrame.\n\n    If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g.,\n    band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to\n    dataframe by setting add_coordinates to True.\n\n    Args:\n        raster: Raster to be converted.\n        bands: Selected bands from multiband raster. Indexing begins from one. Defaults to None.\n        add_coordinates: Determines if pixel coordinates are written into dataframe. Defaults to False.\n\n    Returns:\n        Raster converted to a DataFrame.\n    \"\"\"\n\n    data_frame = _raster_to_dataframe(\n        raster=raster,\n        bands=bands,\n        add_coordinates=add_coordinates,\n    )\n    return data_frame\n
    "},{"location":"exploratory_analyses/dbscan/","title":"DBSCAN","text":""},{"location":"exploratory_analyses/dbscan/#eis_toolkit.exploratory_analyses.dbscan.dbscan","title":"dbscan(data, max_distance=0.5, min_samples=5)","text":"

    Perform DBSCAN clustering on the input data.

    Parameters:

    Name Type Description Default data GeoDataFrame

    GeoDataFrame containing the input data.

    required max_distance float

    The maximum distance between two samples for one to be considered as in the neighborhood of the other. Defaults to 0.5.

    0.5 min_samples int

    The number of samples in a neighborhood for a point to be considered as a core point. Defaults to 5.

    5

    Returns:

    Type Description GeoDataFrame

    GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a point is a core point (1) or not (0).

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterException

    The maximum distance between two samples in a neighborhood is not greater than zero or the number of samples in a neighborhood is not greater than one.

    Source code in eis_toolkit/exploratory_analyses/dbscan.py
    @beartype\ndef dbscan(data: gdp.GeoDataFrame, max_distance: float = 0.5, min_samples: int = 5) -> gdp.GeoDataFrame:\n    \"\"\"\n    Perform DBSCAN clustering on the input data.\n\n    Args:\n        data: GeoDataFrame containing the input data.\n        max_distance: The maximum distance between two samples for one to be considered as in the neighborhood of\n            the other. Defaults to 0.5.\n        min_samples: The number of samples in a neighborhood for a point to be considered as a core point.\n            Defaults to 5.\n\n    Returns:\n        GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a\n            point is a core point (1) or not (0).\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterException: The maximum distance between two samples in a neighborhood is not greater\n            than zero or the number of samples in a neighborhood is not greater than one.\n    \"\"\"\n\n    if data.empty:\n        raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if max_distance <= 0:\n        raise InvalidParameterValueException(\n            \"The input value for the maximum distance between two samples in a neighborhood must be greater than zero.\"\n        )\n\n    if min_samples <= 1:\n        raise InvalidParameterValueException(\n            \"The input value for the minimum number of samples in a neighborhood must be greater than one.\"\n        )\n\n    dbscan_gdf = _dbscan(data, max_distance, min_samples)\n\n    return dbscan_gdf\n
    "},{"location":"exploratory_analyses/descriptive_statistics/","title":"Descriptive statistics","text":""},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_dataframe","title":"descriptive_statistics_dataframe(input_data, column)","text":"

    Generate descriptive statistics from vector data.

    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.

    Parameters:

    Name Type Description Default input_data Union[DataFrame, GeoDataFrame]

    Data to generate descriptive statistics from.

    required column str

    Specify the column to generate descriptive statistics from.

    required

    Returns:

    Type Description dict

    The descriptive statistics in previously described order.

    Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
    @beartype\ndef descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str) -> dict:\n    \"\"\"Generate descriptive statistics from vector data.\n\n    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n\n    Args:\n        input_data: Data to generate descriptive statistics from.\n        column: Specify the column to generate descriptive statistics from.\n\n    Returns:\n        The descriptive statistics in previously described order.\n    \"\"\"\n    if column not in input_data.columns:\n        raise InvalidColumnException\n    data = input_data[column]\n    statistics = _descriptive_statistics(data)\n    return statistics\n
    "},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_raster","title":"descriptive_statistics_raster(input_data)","text":"

    Generate descriptive statistics from raster data.

    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.

    Parameters:

    Name Type Description Default input_data DatasetReader

    Data to generate descriptive statistics from.

    required

    Returns:

    Type Description dict

    The descriptive statistics in previously described order.

    Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
    @beartype\ndef descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:\n    \"\"\"Generate descriptive statistics from raster data.\n\n    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n\n    Args:\n        input_data: Data to generate descriptive statistics from.\n\n    Returns:\n        The descriptive statistics in previously described order.\n    \"\"\"\n    data = input_data.read().flatten()\n    statistics = _descriptive_statistics(data)\n    return statistics\n
    "},{"location":"exploratory_analyses/feature_importance/","title":"Feature importance","text":""},{"location":"exploratory_analyses/feature_importance/#eis_toolkit.exploratory_analyses.feature_importance.evaluate_feature_importance","title":"evaluate_feature_importance(classifier, x_test, y_test, feature_names, number_of_repetition=50, random_state=0)","text":"

    Evaluate the feature importance of a sklearn classifier or linear model.

    Parameters:

    Name Type Description Default classifier BaseEstimator

    Trained classifier.

    required x_test ndarray

    Testing feature data (X data need to be normalized / standardized).

    required y_test ndarray

    Testing target data.

    required feature_names Sequence[str]

    Names of the feature columns.

    required number_of_repetition int

    Number of iteration used when calculate feature importance (default 50).

    50 random_state int

    random state for repeatability of results (Default 0).

    0

    Return: A dataframe composed by features name and Importance value The resulted object with importance mean, importance std, and overall importance Raises: InvalidDatasetException: When the dataset is None.

    Source code in eis_toolkit/exploratory_analyses/feature_importance.py
    @beartype\ndef evaluate_feature_importance(\n    classifier: sklearn.base.BaseEstimator,\n    x_test: np.ndarray,\n    y_test: np.ndarray,\n    feature_names: Sequence[str],\n    number_of_repetition: int = 50,\n    random_state: int = 0,\n) -> tuple[pd.DataFrame, dict]:\n    \"\"\"\n    Evaluate the feature importance of a sklearn classifier or linear model.\n\n    Parameters:\n        classifier: Trained classifier.\n        x_test: Testing feature data (X data need to be normalized / standardized).\n        y_test: Testing target data.\n        feature_names: Names of the feature columns.\n        number_of_repetition: Number of iteration used when calculate feature importance (default 50).\n        random_state: random state for repeatability of results (Default 0).\n    Return:\n        A dataframe composed by features name and Importance value\n        The resulted object with importance mean, importance std, and overall importance\n    Raises:\n        InvalidDatasetException: When the dataset is None.\n    \"\"\"\n\n    if x_test is None or y_test is None:\n        raise InvalidDatasetException\n\n    result = permutation_importance(\n        classifier, x_test, y_test.ravel(), n_repeats=number_of_repetition, random_state=random_state\n    )\n\n    feature_importance = pd.DataFrame({\"Feature\": feature_names, \"Importance\": result.importances_mean})\n\n    feature_importance[\"Importance\"] = feature_importance[\"Importance\"] * 100\n    feature_importance = feature_importance.sort_values(by=\"Importance\", ascending=False)\n\n    return feature_importance, result\n
    "},{"location":"exploratory_analyses/k_means_cluster/","title":"K-means clustering","text":""},{"location":"exploratory_analyses/k_means_cluster/#eis_toolkit.exploratory_analyses.k_means_cluster.k_means_clustering","title":"k_means_clustering(data, number_of_clusters=None, random_state=None)","text":"

    Perform k-means clustering on the input data.

    Parameters:

    Name Type Description Default data GeoDataFrame

    A GeoDataFrame containing the input data.

    required number_of_clusters Optional[int]

    The number of clusters (>= 1) to form. Optional parameter. If not provided, optimal number of clusters is computed using the elbow method.

    None random_state Optional[int]

    A random number generation for centroid initialization to make the randomness deterministic. Optional parameter.

    None

    Returns:

    Type Description GeoDataFrame

    GeoDataFrame containing assigned cluster labels.

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterException

    The number of clusters is less than one.

    Source code in eis_toolkit/exploratory_analyses/k_means_cluster.py
    @beartype\ndef k_means_clustering(\n    data: gdp.GeoDataFrame, number_of_clusters: Optional[int] = None, random_state: Optional[int] = None\n) -> gdp.GeoDataFrame:\n    \"\"\"\n    Perform k-means clustering on the input data.\n\n    Args:\n        data: A GeoDataFrame containing the input data.\n        number_of_clusters: The number of clusters (>= 1) to form. Optional parameter. If not provided,\n            optimal number of clusters is computed using the elbow method.\n        random_state: A random number generation for centroid initialization to make\n            the randomness deterministic. Optional parameter.\n\n    Returns:\n        GeoDataFrame containing assigned cluster labels.\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterException: The number of clusters is less than one.\n    \"\"\"\n\n    if data.empty:\n        raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if number_of_clusters is not None and number_of_clusters < 1:\n        raise InvalidParameterValueException(\"The input value for number of clusters must be at least one.\")\n\n    k_means_gdf = _k_means_clustering(data, number_of_clusters, random_state)\n\n    return k_means_gdf\n
    "},{"location":"exploratory_analyses/parallel_coordinates/","title":"Plot parallel coordinates","text":""},{"location":"exploratory_analyses/parallel_coordinates/#eis_toolkit.exploratory_analyses.parallel_coordinates.plot_parallel_coordinates","title":"plot_parallel_coordinates(df, color_column_name, plot_title=None, palette_name=None, curved_lines=True)","text":"

    Plot a parallel coordinates plot.

    Automatically removes all rows containing null/nan values. Tries to convert columns to numeric to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only the first 8 to plot.

    Parameters:

    Name Type Description Default df DataFrame

    The DataFrame to plot.

    required color_column_name str

    The name of the column in df to use for color encoding.

    required plot_title Optional[str]

    The title for the plot. Default is None.

    None palette_name Optional[str]

    The name of the color palette to use. Default is None.

    None curved_lines bool

    If True, the plot will have curved instead of straight lines. Default is True.

    True

    Returns:

    Type Description Figure

    A matplotlib figure containing the parallel coordinates plot.

    Raises:

    Type Description EmptyDataFrameException

    Raised when the DataFrame is empty.

    InvalidColumnException

    Raised when the color column is not found in the DataFrame.

    InconsistentDataTypesException

    Raised when the color column has multiple data types.

    Source code in eis_toolkit/exploratory_analyses/parallel_coordinates.py
    @beartype\ndef plot_parallel_coordinates(\n    df: pd.DataFrame,\n    color_column_name: str,\n    plot_title: Optional[str] = None,\n    palette_name: Optional[str] = None,\n    curved_lines: bool = True,\n) -> matplotlib.figure.Figure:\n    \"\"\"Plot a parallel coordinates plot.\n\n    Automatically removes all rows containing null/nan values. Tries to convert columns to numeric\n    to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only\n    the first 8 to plot.\n\n    Args:\n        df: The DataFrame to plot.\n        color_column_name: The name of the column in df to use for color encoding.\n        plot_title: The title for the plot. Default is None.\n        palette_name: The name of the color palette to use. Default is None.\n        curved_lines: If True, the plot will have curved instead of straight lines. Default is True.\n\n    Returns:\n        A matplotlib figure containing the parallel coordinates plot.\n\n    Raises:\n        EmptyDataFrameException: Raised when the DataFrame is empty.\n        InvalidColumnException: Raised when the color column is not found in the DataFrame.\n        InconsistentDataTypesException: Raised when the color column has multiple data types.\n    \"\"\"\n\n    if df.empty:\n        raise exceptions.EmptyDataFrameException(\"The input DataFrame is empty.\")\n\n    if color_column_name not in df.columns:\n        raise exceptions.InvalidColumnException(\n            f\"The provided color column {color_column_name} is not found in the DataFrame.\"\n        )\n\n    df = df.convert_dtypes()\n    df = df.apply(pd.to_numeric, errors=\"ignore\")\n\n    color_data = df[color_column_name].to_numpy()\n    if len(set([type(elem) for elem in color_data])) != 1:\n        raise exceptions.InconsistentDataTypesException(\n            \"The color column should have a consistent datatype. Multiple data types detected in the color column.\"\n        )\n\n    df = df.select_dtypes(include=np.number)\n\n    # Drop non-numeric columns and the column used for coloring\n    columns_to_drop = [color_column_name]\n    for column in df.columns.values:\n        if df[column].isnull().all():\n            columns_to_drop.append(column)\n    df = df.loc[:, ~df.columns.isin(columns_to_drop)]\n\n    # Keep only first 8 columns if more are still present\n    if len(df.columns.values) > 8:\n        df = df.iloc[:, :8]\n\n    data_labels = df.columns.values\n    data = df.to_numpy()\n\n    fig = _plot_parallel_coordinates(\n        data=data,\n        data_labels=data_labels,\n        color_data=color_data,\n        color_column_name=color_column_name,\n        plot_title=plot_title,\n        palette_name=palette_name,\n        curved_lines=curved_lines,\n    )\n    return fig\n
    "},{"location":"exploratory_analyses/pca/","title":"PCA","text":""},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.compute_pca","title":"compute_pca(data, number_of_components, scaler_type='standard', nodata=None, color_column_name=None)","text":"

    Compute given number of principal components for numeric input data.

    Various input data formats are accepted and the output format depends on the input format. If input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can be specified in this case.

    Parameters:

    Name Type Description Default data Union[ndarray, DataFrame, GeoDataFrame, DatasetReader]

    Input data for PCA.

    required number_of_components int

    The number of principal components to compute Should be >= 1 and at most the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.

    required scaler_type Literal['standard', 'min_max', 'robust']

    Transform data according to a specified Sklearn scaler. Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".

    'standard' nodata Optional[Number]

    Define nodata value to be masked out. Optional parameter. If None and input is raster, looks for nodata value from raster metadata. Defaults to None.

    None color_column_name Optional[str]

    If input data is a DataFrame or a GeoDataFrame, column name used for coloring data points in the produced pairplot can be defined. Defaults to None.

    None

    Returns:

    Type Description Union[ndarray, Tuple[DataFrame, PairGrid], Tuple[GeoDataFrame, PairGrid], Tuple[ndarray, Profile]]

    The computed principal components in corresponding format as the input data (for raster, output is

    ndarray

    Numpy array containing the data and raster profile) and the explained variance ratios for each component.

    Raises:

    Type Description EmptyDataException

    The input is empty.

    InvalidNumberOfPrincipalComponents

    The number of principal components is less than 1 or more than number of columns if input was (Geo)DataFrame.

    Source code in eis_toolkit/exploratory_analyses/pca.py
    @beartype\ndef compute_pca(\n    data: Union[np.ndarray, pd.DataFrame, gpd.GeoDataFrame, rasterio.io.DatasetReader],\n    number_of_components: int,\n    scaler_type: Literal[\"standard\", \"min_max\", \"robust\"] = \"standard\",\n    nodata: Optional[Number] = None,\n    color_column_name: Optional[str] = None,\n) -> Tuple[\n    Union[\n        np.ndarray,\n        Tuple[pd.DataFrame, sns.PairGrid],\n        Tuple[gpd.GeoDataFrame, sns.PairGrid],\n        Tuple[np.ndarray, rasterio.profiles.Profile],\n    ],\n    np.ndarray,\n]:\n    \"\"\"\n    Compute given number of principal components for numeric input data.\n\n    Various input data formats are accepted and the output format depends on the input format. If\n    input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can\n    be specified in this case.\n\n    Args:\n        data: Input data for PCA.\n        number_of_components: The number of principal components to compute Should be >= 1 and at most\n            the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.\n        scaler_type: Transform data according to a specified Sklearn scaler.\n            Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".\n        nodata: Define nodata value to be masked out. Optional parameter. If None and input is raster, looks\n            for nodata value from raster metadata. Defaults to None.\n        color_column_name: If input data is a DataFrame or a GeoDataFrame, column name used for\n            coloring data points in the produced pairplot can be defined. Defaults to None.\n\n    Returns:\n        The computed principal components in corresponding format as the input data (for raster, output is\n        Numpy array containing the data and raster profile) and the explained variance ratios for each component.\n\n    Raises:\n        EmptyDataException: The input is empty.\n        InvalidNumberOfPrincipalComponents: The number of principal components is less than 1 or more than\n            number of columns if input was (Geo)DataFrame.\n    \"\"\"\n    if scaler_type not in SCALERS:\n        raise exceptions.InvalidParameterValueException(f\"Invalid scaler. Choose from: {list(SCALERS.keys())}\")\n\n    if number_of_components < 1:\n        raise exceptions.InvalidParameterValueException(\"The number of principal components should be >= 1.\")\n\n    # Get feature matrix (Numpy array) from various input types\n    if isinstance(data, np.ndarray):\n        feature_matrix = data\n        if feature_matrix.ndim == 2:  # Table-like data (assumme it is a DataFrame transformed to Numpy array)\n            feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=False)\n        elif feature_matrix.ndim == 3:  # Assume data represents multiband raster data\n            rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n            feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n        else:\n            raise exceptions.InvalidParameterValueException(\n                f\"Unsupported input data format. {feature_matrix.ndim} dimensions detected.\"\n            )\n        if feature_matrix.size == 0:\n            raise exceptions.EmptyDataException(\"Input array is empty.\")\n\n    elif isinstance(data, rasterio.io.DatasetReader):\n        feature_matrix = data.read()\n        if feature_matrix.ndim < 3:\n            raise exceptions.InvalidParameterValueException(\"Input raster should have multiple bands.\")\n        rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n        if nodata is None:\n            nodata = data.nodata\n        feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n\n    elif isinstance(data, pd.DataFrame):\n        df = data.copy()\n        if df.empty:\n            raise exceptions.EmptyDataException(\"Input DataFrame is empty.\")\n        if number_of_components > len(df.columns):\n            raise exceptions.InvalidParameterValueException(\n                \"The number of principal should be at most the number of numeric columns in the input DataFrame.\"\n            )\n        if color_column_name is not None:\n            color_column_data = df[color_column_name]\n\n        if isinstance(data, gpd.GeoDataFrame):\n            geometries = data.geometry\n            crs = data.crs\n            df = df.drop(columns=[\"geometry\"])\n\n        df = df.convert_dtypes()\n        df = df.apply(pd.to_numeric, errors=\"ignore\")\n        df = df.select_dtypes(include=np.number)\n        df = df.astype(dtype=np.number)\n        feature_matrix = df.to_numpy()\n        feature_matrix = feature_matrix.astype(float)\n        feature_matrix, nan_mask = _handle_missing_values(feature_matrix, nodata)\n\n    # Core PCA computation\n    principal_components, explained_variances = _compute_pca(feature_matrix, number_of_components, scaler_type)\n\n    # Put nodata back in and consider new dimension of data\n    if nodata is not None:\n        principal_components[nan_mask[:, number_of_components]] = nodata\n    else:\n        principal_components[nan_mask[:, :number_of_components]] = np.nan\n\n    # Convert PCA output to proper format\n    if isinstance(data, np.ndarray):\n        if data.ndim == 3:\n            result_data = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n        else:\n            result_data = principal_components\n\n    elif isinstance(data, rasterio.io.DatasetReader):\n        principal_components = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n        out_profile = data.profile.copy()\n        out_profile[\"count\"] = number_of_components\n        out_profile[\"dtype\"] = \"float32\"\n        result_data = (principal_components, out_profile)\n\n    elif isinstance(data, pd.DataFrame):\n        component_names = [f\"principal_component_{i+1}\" for i in range(number_of_components)]\n        pca_df = pd.DataFrame(data=principal_components, columns=component_names)\n        if color_column_name is not None:\n            pca_df[color_column_name] = color_column_data\n        sns_pair_grid = plot_pca(pca_df, explained_variances, color_column_name)\n        if isinstance(data, gpd.GeoDataFrame):\n            pca_df = gpd.GeoDataFrame(pca_df, geometry=geometries, crs=crs)\n        result_data = (pca_df, sns_pair_grid)\n\n    return result_data, explained_variances\n
    "},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.plot_pca","title":"plot_pca(pca_df, explained_variances=None, color_column_name=None, save_path=None)","text":"

    Plot a scatter matrix of different principal component combinations.

    Parameters:

    Name Type Description Default pca_df DataFrame

    A DataFrame containing computed principal components.

    required explained_variances Optional[ndarray]

    The explained variance ratios for each principal component. Used for labeling axes in the plot. Optional parameter. Defaults to None.

    None color_column_name Optional[str]

    Name of the column that will be used for color-coding data points. Typically a categorical variable in the original data. Optional parameter, no colors if not provided. Defaults to None.

    None save_path Optional[str]

    The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.

    None

    Returns:

    Type Description PairGrid

    A Seaborn pairgrid containing the PCA scatter matrix.

    Raises:

    Type Description InvalidColumnException

    DataFrame does not contain the given color column.

    Source code in eis_toolkit/exploratory_analyses/pca.py
    @beartype\ndef plot_pca(\n    pca_df: pd.DataFrame,\n    explained_variances: Optional[np.ndarray] = None,\n    color_column_name: Optional[str] = None,\n    save_path: Optional[str] = None,\n) -> sns.PairGrid:\n    \"\"\"Plot a scatter matrix of different principal component combinations.\n\n    Args:\n        pca_df: A DataFrame containing computed principal components.\n        explained_variances: The explained variance ratios for each principal component. Used for labeling\n            axes in the plot. Optional parameter. Defaults to None.\n        color_column_name: Name of the column that will be used for color-coding data points. Typically a\n            categorical variable in the original data. Optional parameter, no colors if not provided.\n            Defaults to None.\n        save_path: The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.\n\n    Returns:\n        A Seaborn pairgrid containing the PCA scatter matrix.\n\n    Raises:\n        InvalidColumnException: DataFrame does not contain the given color column.\n    \"\"\"\n\n    if color_column_name and color_column_name not in pca_df.columns:\n        raise exceptions.InvalidColumnException(\"DataFrame does not contain the given color column.\")\n\n    pair_grid = sns.pairplot(pca_df, hue=color_column_name)\n\n    # Add explained variances to axis labels if provided\n    if explained_variances is not None:\n        labels = [f\"PC {i+1} ({var:.1f}%)\" for i, var in enumerate(explained_variances * 100)]\n    else:\n        labels = [f\"PC {i+1}\" for i in range(len(pair_grid.axes))]\n\n    # Iterate over axes objects and set the labels\n    for i, ax_row in enumerate(pair_grid.axes):\n        for j, ax in enumerate(ax_row):\n            if j == 0:  # Only the first column\n                ax.set_ylabel(labels[i], fontsize=\"large\")\n            if i == len(ax_row) - 1:  # Only the last row\n                ax.set_xlabel(labels[j], fontsize=\"large\")\n\n    if save_path is not None:\n        plt.savefig(save_path)\n\n    return pair_grid\n
    "},{"location":"exploratory_analyses/statistical_testing/","title":"Statistical (hypothesis) testing","text":""},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.chi_square_test","title":"chi_square_test(data, target_column, columns=None)","text":"

    Compute Chi-square test for independence on the input data.

    It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings, booleans or integers, but not floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data

    required target_column str

    Variable against which independence of other variables is tested.

    required columns Sequence[str]

    Variables that are tested against the variable in target_column. If None, every column is used.

    None

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    InvalidParameterValueException

    The target_column is not in input Dataframe or invalid column is provided.

    Returns:

    Type Description dict

    Test statistics for each variable (except target_column).

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef chi_square_test(data: pd.DataFrame, target_column: str, columns: Sequence[str] = None) -> dict:\n    \"\"\"Compute Chi-square test for independence on the input data.\n\n    It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings,\n    booleans or integers, but not floats.\n\n    Args:\n        data: Dataframe containing the input data\n        target_column: Variable against which independence of other variables is tested.\n        columns: Variables that are tested against the variable in target_column. If None, every column is used.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n        InvalidParameterValueException: The target_column is not in input Dataframe or invalid column is provided.\n\n    Returns:\n        Test statistics for each variable (except target_column).\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    if not check_columns_valid(data, target_column):\n        raise exceptions.InvalidParameterValueException(\"Target column not found in the Dataframe.\")\n\n    if columns is not None:\n        invalid_columns = [column for column in columns if column not in data.columns]\n        if any(invalid_columns):\n            raise exceptions.InvalidParameterValueException(\n                f\"The following variables are not in the dataframe: {invalid_columns}\"\n            )\n    else:\n        columns = data.columns\n\n    statistics = {}\n    for column in columns:\n        if column != target_column:\n            contingency_table = pd.crosstab(data[target_column], data[column])\n            chi_square, p_value, degrees_of_freedom, _ = chi2_contingency(contingency_table)\n            statistics[column] = (chi_square, p_value, degrees_of_freedom)\n\n    return statistics\n
    "},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.correlation_matrix","title":"correlation_matrix(data, correlation_method='pearson', min_periods=None)","text":"

    Compute correlation matrix on the input data.

    It is assumed that the data is numeric, i.e. integers or floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data.

    required correlation_method Literal[pearson, kendall, spearman]

    'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.

    'pearson' min_periods Optional[int]

    Minimum number of observations required per pair of columns to have valid result. Optional.

    None

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    InvalidParameterValueException

    min_periods argument is used with method 'kendall'.

    Returns:

    Type Description DataFrame

    Dataframe containing the correlation matrix

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef correlation_matrix(\n    data: pd.DataFrame,\n    correlation_method: Literal[\"pearson\", \"kendall\", \"spearman\"] = \"pearson\",\n    min_periods: Optional[int] = None,\n) -> pd.DataFrame:\n    \"\"\"Compute correlation matrix on the input data.\n\n    It is assumed that the data is numeric, i.e. integers or floats.\n\n    Args:\n        data: Dataframe containing the input data.\n        correlation_method: 'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.\n        min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n        InvalidParameterValueException: min_periods argument is used with method 'kendall'.\n\n    Returns:\n        Dataframe containing the correlation matrix\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    if correlation_method == \"kendall\" and min_periods is not None:\n        raise exceptions.InvalidParameterValueException(\n            \"The argument min_periods is available only with correlation methods 'pearson' and 'spearman'.\"\n        )\n\n    matrix = data.corr(method=correlation_method, min_periods=min_periods)\n\n    return matrix\n
    "},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.covariance_matrix","title":"covariance_matrix(data, min_periods=None, delta_degrees_of_freedom=1)","text":"

    Compute covariance matrix on the input data.

    It is assumed that the data is numeric, i.e. integers or floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data.

    required min_periods Optional[int]

    Minimum number of observations required per pair of columns to have valid result. Optional.

    None delta_degrees_of_freedom int

    Delta degrees of freedom used for computing covariance matrix. Defaults to 1.

    1

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    InvalidParameterValueException

    Provided value for delta_degrees_of_freedom is negative.

    Returns:

    Type Description DataFrame

    Dataframe containing the covariance matrix

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef covariance_matrix(\n    data: pd.DataFrame, min_periods: Optional[int] = None, delta_degrees_of_freedom: int = 1\n) -> pd.DataFrame:\n    \"\"\"Compute covariance matrix on the input data.\n\n    It is assumed that the data is numeric, i.e. integers or floats.\n\n    Args:\n        data: Dataframe containing the input data.\n        min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n        delta_degrees_of_freedom: Delta degrees of freedom used for computing covariance matrix. Defaults to 1.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n        InvalidParameterValueException: Provided value for delta_degrees_of_freedom is negative.\n\n    Returns:\n        Dataframe containing the covariance matrix\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    if delta_degrees_of_freedom < 0:\n        raise exceptions.InvalidParameterValueException(\"Delta degrees of freedom must be non-negative.\")\n\n    matrix = data.cov(min_periods=min_periods, ddof=delta_degrees_of_freedom)\n\n    return matrix\n
    "},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.normality_test","title":"normality_test(data)","text":"

    Compute Shapiro-Wilk test for normality on the input data.

    It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data.

    required

    Returns:

    Type Description dict

    Test statistics for each variable.

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef normality_test(data: pd.DataFrame) -> dict:\n    \"\"\"Compute Shapiro-Wilk test for normality on the input data.\n\n    It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.\n\n    Args:\n        data: Dataframe containing the input data.\n\n    Returns:\n        Test statistics for each variable.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    statistics = {}\n    for column in data.columns:\n        statistic, p_value = shapiro(data[column])\n        statistics[column] = (statistic, p_value)\n\n    return statistics\n
    "},{"location":"prediction/fuzzy_overlay/","title":"Fuzzy overlay","text":""},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.and_overlay","title":"and_overlay(data)","text":"

    Compute an 'and' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef and_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute an 'and' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return data.min(axis=0)\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.gamma_overlay","title":"gamma_overlay(data, gamma)","text":"

    Compute a 'gamma' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required gamma float

    The gamma parameter. With gamma value 0, result will be same as 'product'overlay. When gamma is closer to 1, the weight of 'sum' overlay is increased. Value must be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values or gamma are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef gamma_overlay(data: np.ndarray, gamma: float) -> np.ndarray:\n    \"\"\"Compute a 'gamma' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n        gamma: The gamma parameter. With gamma value 0, result will be same as 'product'overlay.\n            When gamma is closer to 1, the weight of 'sum' overlay is increased.\n            Value must be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values or gamma are not in range [0, 1].\n    \"\"\"\n    if gamma < 0 or gamma > 1:\n        raise exceptions.InvalidParameterValueException(\"The gamma parameter must be in range [0, 1]\")\n\n    sum = sum_overlay(data=data)\n    product = product_overlay(data=data)\n    return product ** (1 - gamma) * sum**gamma\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.or_overlay","title":"or_overlay(data)","text":"

    Compute an 'or' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef or_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute an 'or' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return data.max(axis=0)\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.product_overlay","title":"product_overlay(data)","text":"

    Compute a 'product' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef product_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute a 'product' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return np.prod(data, axis=0)\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.sum_overlay","title":"sum_overlay(data)","text":"

    Compute a 'sum' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef sum_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute a 'sum' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return data.sum(axis=0) - np.prod(data, axis=0)\n
    "},{"location":"prediction/weights_of_evidence/","title":"Weights of evidence","text":""},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_responses","title":"weights_of_evidence_calculate_responses(output_arrays, nr_of_deposits, nr_of_pixels)","text":"

    Calculate the posterior probabilities for the given generalized weight arrays.

    Parameters:

    Name Type Description Default output_arrays Sequence[Dict[str, ndarray]]

    List of output array dictionaries returned by weights of evidence calculations. For each dictionary, generalized weight and generalized standard deviation arrays are used and summed together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found, the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).

    required nr_of_deposits int

    Number of deposit pixels in the input data for weights of evidence calculations.

    required nr_of_pixels int

    Number of evidence pixels in the input data for weights of evidence calculations.

    required

    Returns:

    Type Description ndarray

    Array of posterior probabilites.

    ndarray

    Array of standard deviations in the posterior probability calculations.

    ndarray

    Array of confidence of the prospectivity values obtained in the posterior probability array.

    Source code in eis_toolkit/prediction/weights_of_evidence.py
    @beartype\ndef weights_of_evidence_calculate_responses(\n    output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int\n) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:\n    \"\"\"Calculate the posterior probabilities for the given generalized weight arrays.\n\n    Args:\n        output_arrays: List of output array dictionaries returned by weights of evidence calculations.\n            For each dictionary, generalized weight and generalized standard deviation arrays are used and summed\n            together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found,\n            the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).\n        nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.\n        nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations.\n\n    Returns:\n        Array of posterior probabilites.\n        Array of standard deviations in the posterior probability calculations.\n        Array of confidence of the prospectivity values obtained in the posterior probability array.\n    \"\"\"\n    gen_weights_sum = sum(\n        [\n            item[GENERALIZED_WEIGHT_PLUS_COLUMN]\n            if GENERALIZED_WEIGHT_PLUS_COLUMN in item.keys()\n            else item[WEIGHT_PLUS_COLUMN]\n            for item in output_arrays\n        ]\n    )\n    gen_weights_variance_sum = sum(\n        [\n            np.square(item[GENERALIZED_S_WEIGHT_PLUS_COLUMN])\n            if GENERALIZED_S_WEIGHT_PLUS_COLUMN in item.keys()\n            else np.square(item[WEIGHT_S_PLUS_COLUMN])\n            for item in output_arrays\n        ]\n    )\n\n    prior_probabilities = nr_of_deposits / nr_of_pixels\n    prior_odds = np.log(prior_probabilities / (1 - prior_probabilities))\n    posterior_probabilities = np.exp(gen_weights_sum + prior_odds) / (1 + np.exp(gen_weights_sum + prior_odds))\n\n    posterior_probabilities_squared = np.square(posterior_probabilities)\n    posterior_probabilities_std = np.sqrt(\n        (1 / nr_of_deposits + gen_weights_variance_sum) * posterior_probabilities_squared\n    )\n\n    confidence_array = posterior_probabilities / posterior_probabilities_std\n    return posterior_probabilities, posterior_probabilities_std, confidence_array\n
    "},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_weights","title":"weights_of_evidence_calculate_weights(evidential_raster, deposits, raster_nodata=None, weights_type='unique', studentized_contrast_threshold=1, arrays_to_generate=None)","text":"

    Calculate weights of spatial associations.

    Parameters:

    Name Type Description Default evidential_raster DatasetReader

    The evidential raster.

    required deposits GeoDataFrame

    Vector data representing the mineral deposits or occurences point data.

    required raster_nodata Optional[Number]

    If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None (nodata from raster metadata is used).

    None weights_type Literal[unique, categorical, ascending, descending]

    Accepted values are 'unique', 'categorical', 'ascending' and 'descending'. Unique weights does not create generalized classes and does not use a studentized contrast threshold value while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so that all classes with studentized contrast below the defined threshold are grouped into one generalized class. Cumulative ascending and descending weights find the class with max contrast and group classes above/below into generalized classes. Generalized weights are also calculated for generalized classes.

    'unique' studentized_contrast_threshold Number

    Studentized contrast threshold value used with 'categorical', 'ascending' and 'descending' weight types. Used either as reclassification threshold directly (categorical) or to check that class with max contrast has studentized contrast value at least the defined value (cumulative). Defaults to 1.

    1 arrays_to_generate Optional[Sequence[str]]

    Arrays to generate from the computed weight metrics. All column names in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+] for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"] for the cumulative weight types.

    None

    Returns:

    Type Description DataFrame

    Dataframe with weights of spatial association between the input data.

    dict

    Dictionary of arrays for specified metrics.

    dict

    Raster metadata.

    int

    Number of deposit pixels.

    int

    Number of all evidence pixels.

    Source code in eis_toolkit/prediction/weights_of_evidence.py
    @beartype\ndef weights_of_evidence_calculate_weights(\n    evidential_raster: rasterio.io.DatasetReader,\n    deposits: gpd.GeoDataFrame,\n    raster_nodata: Optional[Number] = None,\n    weights_type: Literal[\"unique\", \"categorical\", \"ascending\", \"descending\"] = \"unique\",\n    studentized_contrast_threshold: Number = 1,\n    arrays_to_generate: Optional[Sequence[str]] = None,\n) -> Tuple[pd.DataFrame, dict, dict, int, int]:\n    \"\"\"\n    Calculate weights of spatial associations.\n\n    Args:\n        evidential_raster: The evidential raster.\n        deposits: Vector data representing the mineral deposits or occurences point data.\n        raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None\n            (nodata from raster metadata is used).\n        weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'.\n            Unique weights does not create generalized classes and does not use a studentized contrast threshold value\n            while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so\n            that all classes with studentized contrast below the defined threshold are grouped into one generalized\n            class. Cumulative ascending and descending weights find the class with max contrast and group classes\n            above/below into generalized classes. Generalized weights are also calculated for generalized classes.\n        studentized_contrast_threshold: Studentized contrast threshold value used with 'categorical', 'ascending' and\n            'descending' weight types. Used either as reclassification threshold directly (categorical) or to check\n            that class with max contrast has studentized contrast value at least the defined value (cumulative).\n            Defaults to 1.\n        arrays_to_generate: Arrays to generate from the computed weight metrics. All column names\n            in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+]\n            for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"]\n            for the cumulative weight types.\n\n    Returns:\n        Dataframe with weights of spatial association between the input data.\n        Dictionary of arrays for specified metrics.\n        Raster metadata.\n        Number of deposit pixels.\n        Number of all evidence pixels.\n    \"\"\"\n\n    if arrays_to_generate is None:\n        if weights_type == \"unique\":\n            metrics_to_arrays = DEFAULT_METRICS_UNIQUE\n        else:\n            metrics_to_arrays = DEFAULT_METRICS_CUMULATIVE\n    else:\n        for col_name in arrays_to_generate:\n            if col_name not in VALID_DF_COLUMNS:\n                raise exceptions.InvalidColumnException(\n                    f\"Arrays to generate contains invalid metric / column name: {col_name}.\"\n                )\n        metrics_to_arrays = arrays_to_generate.copy()\n\n    # 1. Preprocess data\n    evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata)\n    raster_meta = evidential_raster.meta\n\n    # Rasterize deposits\n    deposit_array, _ = rasterize_vector(\n        geodataframe=deposits, default_value=1.0, base_raster_profile=raster_meta, fill_value=0.0\n    )\n\n    # Mask NaN out of the array\n    nodata_mask = np.isnan(evidence_array)\n    masked_evidence_array = evidence_array[~nodata_mask]\n    masked_deposit_array = deposit_array[~nodata_mask]\n\n    # 2. WofE calculations\n    if weights_type == \"unique\" or weights_type == \"categorical\":\n        wofe_weights = _unique_weights(masked_deposit_array, masked_evidence_array)\n    elif weights_type == \"ascending\":\n        wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=True)\n    elif weights_type == \"descending\":\n        wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=False)\n    else:\n        raise exceptions.InvalidParameterValueException(\n            \"Expected weights_type to be one of unique, categorical, ascending or descending.\"\n        )\n\n    # 3. Create DataFrame based on calculated metrics\n    df_entries = []\n    for cls, metrics in wofe_weights.items():\n        metrics = [round(metric, 4) if isinstance(metric, np.floating) else metric for metric in metrics]\n        A, _, C, _, w_plus, s_w_plus, w_minus, s_w_minus, contrast, s_contrast, studentized_contrast = metrics\n        df_entries.append(\n            {\n                CLASS_COLUMN: cls,\n                PIXEL_COUNT_COLUMN: A + C,\n                DEPOSIT_COUNT_COLUMN: A,\n                WEIGHT_PLUS_COLUMN: w_plus,\n                WEIGHT_S_PLUS_COLUMN: s_w_plus,\n                WEIGHT_MINUS_COLUMN: w_minus,\n                WEIGHT_S_MINUS_COLUMN: s_w_minus,\n                CONTRAST_COLUMN: contrast,\n                S_CONTRAST_COLUMN: s_contrast,\n                STUDENTIZED_CONTRAST_COLUMN: studentized_contrast,\n            }\n        )\n    weights_df = pd.DataFrame(df_entries)\n\n    # 4. If we use cumulative weights type, calculate generalized classes and weights\n    if weights_type == \"categorical\":\n        weights_df = _generalized_classes_categorical(weights_df, studentized_contrast_threshold)\n        weights_df = _generalized_weights_categorical(weights_df, masked_deposit_array)\n    elif weights_type == \"ascending\" or weights_type == \"descending\":\n        weights_df = _generalized_classes_cumulative(weights_df, studentized_contrast_threshold)\n        weights_df = _generalized_weights_cumulative(weights_df, masked_deposit_array)\n\n    # 5. Generate arrays for desired metrics\n    arrays_dict = _generate_arrays_from_metrics(evidence_array, weights_df, metrics_to_arrays)\n\n    # Return nr. of deposit pixels  and nr. of all evidence pixels for to be used in calculate responses\n    nr_of_deposits = int(np.sum(masked_deposit_array == 1))\n    nr_of_pixels = int(np.size(masked_evidence_array))\n\n    return weights_df, arrays_dict, raster_meta, nr_of_deposits, nr_of_pixels\n
    "},{"location":"raster_processing/check_raster_grids/","title":"Check raster grids","text":""},{"location":"raster_processing/check_raster_grids/#eis_toolkit.raster_processing.check_raster_grids.check_raster_grids","title":"check_raster_grids(rasters, same_extent=False)","text":"

    Check the set of input rasters for matching gridding and optionally matching bounds.

    Parameters:

    Name Type Description Default rasters List[DatasetReader]

    List of rasters to test for matching gridding.

    required same_extent bool

    optional boolean argument that determines if rasters are tested for matching bounds. Default set to False.

    False

    Returns:

    Type Description bool

    True if gridding and optionally bounds matches, False if not.

    Source code in eis_toolkit/raster_processing/check_raster_grids.py
    def check_raster_grids(  # type: ignore[no-any-unimported]\n    rasters: List[rasterio.io.DatasetReader], same_extent: bool = False\n) -> bool:\n    \"\"\"\n    Check the set of input rasters for matching gridding and optionally matching bounds.\n\n    Args:\n        rasters: List of rasters to test for matching gridding.\n        same_extent: optional boolean argument that determines if rasters are tested for matching bounds.\n            Default set to False.\n\n    Returns:\n        True if gridding and optionally bounds matches, False if not.\n    \"\"\"\n    check = _check_raster_grids(rasters=rasters, same_extent=same_extent)\n    return check\n
    "},{"location":"raster_processing/clipping/","title":"Clipping","text":""},{"location":"raster_processing/clipping/#eis_toolkit.raster_processing.clipping.clip_raster","title":"clip_raster(raster, geodataframe)","text":"

    Clips a raster with polygon geometries.

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be clipped.

    required geodataframe GeoDataFrame

    A geodataframe containing the geometries to do the clipping with. Should contain only polygon features.

    required

    Returns:

    Type Description ndarray

    The clipped raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NonMatchingCrsException

    The raster and geodataframe are not in the same CRS.

    NotApplicableGeometryTypeException

    The input geometries contain non-polygon features.

    Source code in eis_toolkit/raster_processing/clipping.py
    @beartype\ndef clip_raster(raster: rasterio.io.DatasetReader, geodataframe: geopandas.GeoDataFrame) -> Tuple[np.ndarray, dict]:\n    \"\"\"Clips a raster with polygon geometries.\n\n    Args:\n        raster: The raster to be clipped.\n        geodataframe: A geodataframe containing the geometries to do the clipping with.\n            Should contain only polygon features.\n\n    Returns:\n        The clipped raster data.\n        The updated metadata.\n\n    Raises:\n        NonMatchingCrsException: The raster and geodataframe are not in the same CRS.\n        NotApplicableGeometryTypeException: The input geometries contain non-polygon features.\n    \"\"\"\n    geometries = geodataframe[\"geometry\"]\n\n    if not check_matching_crs(\n        objects=[raster, geometries],\n    ):\n        raise NonMatchingCrsException(\"The raster and geodataframe are not in the same CRS.\")\n\n    if not check_geometry_types(\n        geometries=geometries,\n        allowed_types=[\"Polygon\", \"MultiPolygon\"],\n    ):\n        raise NotApplicableGeometryTypeException(\"The input geometries contain non-polygon features.\")\n\n    out_image, out_meta = _clip_raster(\n        raster=raster,\n        geometries=geometries,\n    )\n\n    return out_image, out_meta\n
    "},{"location":"raster_processing/create_constant_raster/","title":"Create constant raster","text":""},{"location":"raster_processing/create_constant_raster/#eis_toolkit.raster_processing.create_constant_raster.create_constant_raster","title":"create_constant_raster(constant_value, template_raster=None, coord_west=None, coord_north=None, coord_east=None, coord_south=None, target_epsg=None, target_pixel_size=None, raster_width=None, raster_height=None, nodata_value=None)","text":"

    Create a constant raster based on a user-defined value.

    Provide 3 methods for raster creation: 1. Set extent and coordinate system based on a template raster. 2. Set extent from origin, based on the western and northern coordinates and the pixel size. 3. Set extent from bounds, based on western, northern, eastern and southern points.

    Always provide values for height and width for the last two options, which correspond to the desired number of pixels for rows and columns.

    Parameters:

    Name Type Description Default constant_value Number

    The constant value to use in the raster.

    required template_raster Optional[DatasetReader]

    An optional raster to use as a template for the output.

    None coord_west Optional[Number]

    The western coordinate of the output raster in [m].

    None coord_east Optional[Number]

    The eastern coordinate of the output raster in [m].

    None coord_south Optional[Number]

    The southern coordinate of the output raster in [m].

    None coord_north Optional[Number]

    The northern coordinate of the output raster in [m].

    None target_epsg Optional[int]

    The EPSG code for the output raster.

    None target_pixel_size Optional[int]

    The pixel size of the output raster.

    None raster_width Optional[int]

    The width of the output raster.

    None raster_height Optional[int]

    The height of the output raster.

    None nodata_value Optional[Number]

    The nodata value of the output raster.

    None

    Returns:

    Type Description Tuple[ndarray, dict]

    A tuple containing the output raster as a NumPy array and updated metadata.

    Raises:

    Type Description InvalidParameterValueException

    Provide invalid input parameter.

    Source code in eis_toolkit/raster_processing/create_constant_raster.py
    @beartype\ndef create_constant_raster(  # type: ignore[no-any-unimported]\n    constant_value: Number,\n    template_raster: Optional[rasterio.io.DatasetReader] = None,\n    coord_west: Optional[Number] = None,\n    coord_north: Optional[Number] = None,\n    coord_east: Optional[Number] = None,\n    coord_south: Optional[Number] = None,\n    target_epsg: Optional[int] = None,\n    target_pixel_size: Optional[int] = None,\n    raster_width: Optional[int] = None,\n    raster_height: Optional[int] = None,\n    nodata_value: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Create a constant raster based on a user-defined value.\n\n    Provide 3 methods for raster creation:\n    1. Set extent and coordinate system based on a template raster.\n    2. Set extent from origin, based on the western and northern coordinates and the pixel size.\n    3. Set extent from bounds, based on western, northern, eastern and southern points.\n\n    Always provide values for height and width for the last two options, which correspond to\n    the desired number of pixels for rows and columns.\n\n    Args:\n        constant_value: The constant value to use in the raster.\n        template_raster: An optional raster to use as a template for the output.\n        coord_west: The western coordinate of the output raster in [m].\n        coord_east: The eastern coordinate of the output raster in [m].\n        coord_south: The southern coordinate of the output raster in [m].\n        coord_north: The northern coordinate of the output raster in [m].\n        target_epsg: The EPSG code for the output raster.\n        target_pixel_size: The pixel size of the output raster.\n        raster_width: The width of the output raster.\n        raster_height: The height of the output raster.\n        nodata_value: The nodata value of the output raster.\n\n    Returns:\n        A tuple containing the output raster as a NumPy array and updated metadata.\n\n    Raises:\n        InvalidParameterValueException: Provide invalid input parameter.\n    \"\"\"\n\n    if template_raster is not None:\n        out_array, out_meta = _create_constant_raster_from_template(constant_value, template_raster, nodata_value)\n\n    elif all(coords is not None for coords in [coord_west, coord_east, coord_south, coord_north]):\n        if raster_height <= 0 or raster_width <= 0:\n            raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n        if not check_minmax_position((coord_west, coord_east) or not check_minmax_position((coord_south, coord_north))):\n            raise InvalidParameterValueException(\"Invalid coordinate values provided.\")\n\n        out_array, out_meta = _create_constant_raster_from_bounds(\n            constant_value,\n            coord_west,\n            coord_north,\n            coord_east,\n            coord_south,\n            target_epsg,\n            raster_width,\n            raster_height,\n            nodata_value,\n        )\n\n    elif all(coords is not None for coords in [coord_west, coord_north]) and all(\n        coords is None for coords in [coord_east, coord_south]\n    ):\n        if raster_height <= 0 or raster_width <= 0:\n            raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n        if target_pixel_size <= 0:\n            raise InvalidParameterValueException(\"Invalid pixel size.\")\n\n        out_array, out_meta = _create_constant_raster_from_origin(\n            constant_value,\n            coord_west,\n            coord_north,\n            target_epsg,\n            target_pixel_size,\n            raster_width,\n            raster_height,\n            nodata_value,\n        )\n\n    else:\n        raise InvalidParameterValueException(\"Suitable parameter values were not provided for any of the 3 methods.\")\n\n    constant_value = cast_scalar_to_int(constant_value)\n    nodata_value = cast_scalar_to_int(out_meta[\"nodata\"])\n\n    if isinstance(constant_value, int) and isinstance(nodata_value, int):\n        target_dtype = np.result_type(get_min_int_type(constant_value), get_min_int_type(nodata_value))\n        out_array = out_array.astype(target_dtype)\n        out_meta[\"dtype\"] = out_array.dtype\n    elif isinstance(constant_value, int) and isinstance(nodata_value, float):\n        out_array = out_array.astype(get_min_int_type(constant_value))\n        out_meta[\"dtype\"] = np.float64.__name__\n    elif isinstance(constant_value, float):\n        out_array = out_array.astype(np.float64)\n        out_meta[\"dtype\"] = out_array.dtype\n\n    return out_array, out_meta\n
    "},{"location":"raster_processing/extract_values_from_raster/","title":"Extract values from raster","text":""},{"location":"raster_processing/extract_values_from_raster/#eis_toolkit.raster_processing.extract_values_from_raster.extract_values_from_raster","title":"extract_values_from_raster(raster_list, geodataframe, raster_column_names=None)","text":"

    Extract raster values using point data to a DataFrame.

    If custom column names are not given, column names are file_name for singleband files and file_name_bandnumber for multiband files. If custom column names are given, there should be column names for each raster provided in the raster list.

    Parameters:

    Name Type Description Default raster_list Sequence[DatasetReader]

    List to extract values from.

    required geodataframe GeoDataFrame

    Object to extract values with.

    required raster_column_names Optional[Sequence[str]]

    List of optional column names for bands.

    None

    Returns:

    Type Description DataFrame

    Dataframe with x & y coordinates and the values from the raster file(s) as columns.

    Raises:

    Type Description NonMatchingParameterLengthsException

    raster_list and raster_columns_names have different lengths.

    Source code in eis_toolkit/raster_processing/extract_values_from_raster.py
    @beartype\ndef extract_values_from_raster(\n    raster_list: Sequence[rasterio.io.DatasetReader],\n    geodataframe: gpd.GeoDataFrame,\n    raster_column_names: Optional[Sequence[str]] = None,\n) -> pd.DataFrame:\n    \"\"\"Extract raster values using point data to a DataFrame.\n\n       If custom column names are not given, column names are file_name for singleband files\n       and file_name_bandnumber for multiband files. If custom column names are given, there\n       should be column names for each raster provided in the raster list.\n\n    Args:\n        raster_list: List to extract values from.\n        geodataframe: Object to extract values with.\n        raster_column_names: List of optional column names for bands.\n\n    Returns:\n        Dataframe with x & y coordinates and the values from the raster file(s) as columns.\n\n    Raises:\n        NonMatchingParameterLengthsException: raster_list and raster_columns_names have different lengths.\n    \"\"\"\n    if raster_column_names == []:\n        raster_column_names = None\n\n    if raster_column_names is not None and len(raster_list) != len(raster_column_names):\n        raise NonMatchingParameterLengthsException(\"Raster list and raster columns names have different lengths.\")\n\n    data_frame = _extract_values_from_raster(\n        raster_list=raster_list, geodataframe=geodataframe, raster_column_names=raster_column_names\n    )\n\n    return data_frame\n
    "},{"location":"raster_processing/reprojecting/","title":"Reprojecting","text":""},{"location":"raster_processing/reprojecting/#eis_toolkit.raster_processing.reprojecting.reproject_raster","title":"reproject_raster(raster, target_crs, resampling_method=warp.Resampling.nearest)","text":"

    Reprojects raster to match given coordinate reference system (EPSG).

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be reprojected.

    required target_crs int

    Target CRS as EPSG code.

    required resampling_method Resampling

    Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.

    nearest

    Returns:

    Type Description ndarray

    The reprojected raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NonMatchinCrsException

    Raster is already in the target CRS.

    Source code in eis_toolkit/raster_processing/reprojecting.py
    @beartype\ndef reproject_raster(\n    raster: rasterio.io.DatasetReader, target_crs: int, resampling_method: warp.Resampling = warp.Resampling.nearest\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Reprojects raster to match given coordinate reference system (EPSG).\n\n    Args:\n        raster: The raster to be reprojected.\n        target_crs: Target CRS as EPSG code.\n        resampling_method: Resampling method. Most suitable method depends on the dataset and context.\n            Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.\n\n    Returns:\n        The reprojected raster data.\n        The updated metadata.\n\n    Raises:\n        NonMatchinCrsException: Raster is already in the target CRS.\n    \"\"\"\n    if target_crs == int(raster.crs.to_string()[5:]):\n        raise MatchingCrsException(\"Raster is already in the target CRS.\")\n\n    out_image, out_meta = _reproject_raster(raster, target_crs, resampling_method)\n\n    return out_image, out_meta\n
    "},{"location":"raster_processing/resampling/","title":"Resampling","text":""},{"location":"raster_processing/resampling/#eis_toolkit.raster_processing.resampling.resample","title":"resample(raster, resolution, resampling_method=Resampling.bilinear)","text":"

    Resamples raster according to given resolution.

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be resampled.

    required resolution Number

    Target resolution i.e. cell size of the output raster.

    required resampling_method Resampling

    Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to bilinear.

    bilinear

    Returns:

    Type Description ndarray

    The resampled raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NumericValueSignException

    Resolution is not a positive value.

    Source code in eis_toolkit/raster_processing/resampling.py
    @beartype\ndef resample(\n    raster: rasterio.io.DatasetReader,\n    resolution: Number,\n    resampling_method: Resampling = Resampling.bilinear,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Resamples raster according to given resolution.\n\n    Args:\n        raster: The raster to be resampled.\n        resolution: Target resolution i.e. cell size of the output raster.\n        resampling_method: Resampling method. Most suitable\n            method depends on the dataset and context. Nearest, bilinear and cubic are some\n            common choices. This parameter defaults to bilinear.\n\n    Returns:\n        The resampled raster data.\n        The updated metadata.\n\n    Raises:\n        NumericValueSignException: Resolution is not a positive value.\n    \"\"\"\n    if resolution <= 0:\n        raise exceptions.NumericValueSignException(f\"Expected a positive value for resolution: {resolution})\")\n\n    out_image, out_meta = _resample(raster, resolution, resampling_method)\n    return out_image, out_meta\n
    "},{"location":"raster_processing/snapping/","title":"Snapping","text":""},{"location":"raster_processing/snapping/#eis_toolkit.raster_processing.snapping.snap_with_raster","title":"snap_with_raster(raster, snap_raster)","text":"

    Snaps/aligns raster to given snap raster.

    Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction. If rasters are aligned, simply returns input raster data and metadata.

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be clipped.

    required snap_raster DatasetReader

    The snap raster i.e. reference grid raster.

    required

    Returns:

    Type Description ndarray

    The snapped raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NonMatchingCrsException

    Raster and and snap raster are not in the same CRS.

    MatchingRasterGridException

    Raster grids are already aligned.

    Source code in eis_toolkit/raster_processing/snapping.py
    @beartype\ndef snap_with_raster(raster: rasterio.DatasetReader, snap_raster: rasterio.DatasetReader) -> Tuple[np.ndarray, dict]:\n    \"\"\"Snaps/aligns raster to given snap raster.\n\n    Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction.\n    If rasters are aligned, simply returns input raster data and metadata.\n\n    Args:\n        raster: The raster to be clipped.\n        snap_raster: The snap raster i.e. reference grid raster.\n\n    Returns:\n        The snapped raster data.\n        The updated metadata.\n\n    Raises:\n        NonMatchingCrsException: Raster and and snap raster are not in the same CRS.\n        MatchingRasterGridException: Raster grids are already aligned.\n    \"\"\"\n\n    if not check_matching_crs(\n        objects=[raster, snap_raster],\n    ):\n        raise NonMatchingCrsException(\"Raster and and snap raster have different CRS.\")\n\n    if snap_raster.bounds.bottom == raster.bounds.bottom and snap_raster.bounds.left == raster.bounds.left:\n        raise MatchingRasterGridException(\"Raster grids are already aligned.\")\n\n    out_image, out_meta = _snap(raster, snap_raster)\n    return out_image, out_meta\n
    "},{"location":"raster_processing/unifying/","title":"Unifying","text":""},{"location":"raster_processing/unifying/#eis_toolkit.raster_processing.unifying.unify_raster_grids","title":"unify_raster_grids(base_raster, rasters_to_unify, resampling_method=Resampling.nearest, same_extent=False)","text":"

    Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.

    Parameters:

    Name Type Description Default base_raster DatasetReader

    The base raster to determine target raster grid properties.

    required rasters_to_unify Sequence[DatasetReader]

    Rasters to be unified with the base raster.

    required resampling_method Resampling

    Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.

    nearest same_extent bool

    If the unified rasters will be forced to have the same extent/bounds as the base raster. Expands smaller rasters with nodata cells. Defaults to False.

    False

    Returns:

    Type Description List[Tuple[ndarray, dict]]

    List of unified rasters' data and metadata. First element is the base raster.

    Raises:

    Type Description InvalidParameterValueException

    Rasters to unify is empty.

    Source code in eis_toolkit/raster_processing/unifying.py
    @beartype\ndef unify_raster_grids(\n    base_raster: rasterio.io.DatasetReader,\n    rasters_to_unify: Sequence[rasterio.io.DatasetReader],\n    resampling_method: Resampling = Resampling.nearest,\n    same_extent: bool = False,\n) -> List[Tuple[np.ndarray, dict]]:\n    \"\"\"Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.\n\n    Args:\n        base_raster: The base raster to determine target raster grid properties.\n        rasters_to_unify: Rasters to be unified with the base raster.\n        resampling_method: Resampling method. Most suitable\n            method depends on the dataset and context. Nearest, bilinear and cubic are some\n            common choices. This parameter defaults to nearest.\n        same_extent: If the unified rasters will be forced to have the same extent/bounds\n            as the base raster. Expands smaller rasters with nodata cells. Defaults to False.\n\n    Returns:\n        List of unified rasters' data and metadata. First element is the base raster.\n\n    Raises:\n        InvalidParameterValueException: Rasters to unify is empty.\n    \"\"\"\n    if len(rasters_to_unify) == 0:\n        raise InvalidParameterValueException(\"Rasters to unify is empty.\")\n\n    out_rasters = _unify_raster_grids(base_raster, rasters_to_unify, resampling_method, same_extent)\n    return out_rasters\n
    "},{"location":"raster_processing/windowing/","title":"Windowing","text":""},{"location":"raster_processing/windowing/#eis_toolkit.raster_processing.windowing.extract_window","title":"extract_window(raster, center_coords, height, width)","text":"

    Extract window from raster.

    Center coordinate must be inside the raster but window can extent outside the raster in which case padding with raster nodata value is used. Args: raster: Source raster. center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS. height: Window height in pixels. width: Window width in pixels.

    Returns:

    Type Description ndarray

    The extracted raster window.

    dict

    The updated metadata.

    Raises:

    Type Description InvalidParameterValueException

    Window size is too small.

    CoordinatesOutOfBoundException

    Window center coordinates are out of raster bounds.

    Source code in eis_toolkit/raster_processing/windowing.py
    @beartype\ndef extract_window(\n    raster: rasterio.io.DatasetReader,\n    center_coords: Tuple[Number, Number],\n    height: int,\n    width: int,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Extract window from raster.\n\n       Center coordinate must be inside the raster but window can extent outside the raster in which case padding with\n       raster nodata value is used.\n    Args:\n        raster: Source raster.\n        center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS.\n        height: Window height in pixels.\n        width: Window width in pixels.\n\n    Returns:\n        The extracted raster window.\n        The updated metadata.\n\n    Raises:\n        InvalidParameterValueException: Window size is too small.\n        CoordinatesOutOfBoundException: Window center coordinates are out of raster bounds.\n    \"\"\"\n\n    if height < 1 or width < 1:\n        raise InvalidParameterValueException(f\"Window size is too small: {height}, {width}.\")\n\n    center_x = center_coords[0]\n    center_y = center_coords[1]\n\n    if (\n        center_x < raster.bounds.left\n        or center_x > raster.bounds.right\n        or center_y < raster.bounds.bottom\n        or center_y > raster.bounds.top\n    ):\n        raise CoordinatesOutOfBoundsException(\"Window center coordinates are out of raster bounds.\")\n\n    out_image, out_meta = _extract_window(raster, center_coords, height, width)\n\n    return out_image, out_meta\n
    "},{"location":"training_data_tools/class_balancing/","title":"Class balancing","text":""},{"location":"training_data_tools/class_balancing/#eis_toolkit.training_data_tools.class_balancing.balance_SMOTETomek","title":"balance_SMOTETomek(X, y, sampling_strategy='auto', random_state=None)","text":"

    Balances the classes of input dataset using SMOTETomek resampling method.

    Parameters:

    Name Type Description Default X Union[DataFrame, ndarray]

    The feature matrix (input data as a DataFrame).

    required y Union[Series, ndarray]

    The target labels corresponding to the feature matrix.

    required sampling_strategy Union[float, str, dict]

    Parameter controlling how to perform the resampling. If float, specifies the ratio of samples in minority class to samples of majority class, if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"), if dict, the keys should be targeted classes and values the desired number of samples for the class. Defaults to \"auto\", which will resample all classes except the majority class.

    'auto' random_state Optional[int]

    Parameter controlling randomization of the algorithm. Can be given a seed (number). Defaults to None, which randomizes the seed.

    None

    Returns:

    Type Description tuple[Union[DataFrame, ndarray], Union[Series, ndarray]]

    Resampled feature matrix and target labels.

    Raises:

    Type Description NonMatchingParameterLengthsException

    If X and y have different length.

    Source code in eis_toolkit/training_data_tools/class_balancing.py
    @beartype\ndef balance_SMOTETomek(\n    X: Union[pd.DataFrame, np.ndarray],\n    y: Union[pd.Series, np.ndarray],\n    sampling_strategy: Union[float, str, dict] = \"auto\",\n    random_state: Optional[int] = None,\n) -> tuple[Union[pd.DataFrame, np.ndarray], Union[pd.Series, np.ndarray]]:\n    \"\"\"Balances the classes of input dataset using SMOTETomek resampling method.\n\n    Args:\n        X: The feature matrix (input data as a DataFrame).\n        y: The target labels corresponding to the feature matrix.\n        sampling_strategy: Parameter controlling how to perform the resampling.\n            If float, specifies the ratio of samples in minority class to samples of majority class,\n            if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"),\n            if dict, the keys should be targeted classes and values the desired number of samples for the class.\n            Defaults to \"auto\", which will resample all classes except the majority class.\n        random_state: Parameter controlling randomization of the algorithm. Can be given a seed (number).\n            Defaults to None, which randomizes the seed.\n\n    Returns:\n        Resampled feature matrix and target labels.\n\n    Raises:\n        NonMatchingParameterLengthsException: If X and y have different length.\n    \"\"\"\n\n    if len(X) != len(y):\n        raise exceptions.NonMatchingParameterLengthsException(\n            \"Feature matrix X and target labels y must have the same length.\"\n        )\n\n    X_res, y_res = SMOTETomek(sampling_strategy=sampling_strategy, random_state=random_state).fit_resample(X, y)\n    return X_res, y_res\n
    "},{"location":"transformations/binarize/","title":"Binarize","text":""},{"location":"transformations/binarize/#eis_toolkit.transformations.binarize.binarize","title":"binarize(raster, bands=None, thresholds=[Number], nodata=None)","text":"

    Binarize data based on a given threshold.

    Replaces values less or equal threshold with 0. Replaces values greater than the threshold with 1.

    Takes one nodata value which will be re-written after transformation.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The threshold can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None thresholds Sequence[Number]

    Threshold values for transformation.

    [Number] nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    Source code in eis_toolkit/transformations/binarize.py
    @beartype\ndef binarize(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    thresholds: Sequence[Number] = [Number],\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Binarize data based on a given threshold.\n\n    Replaces values less or equal threshold with 0.\n    Replaces values greater than the threshold with 1.\n\n    Takes one nodata value which will be re-written after transformation.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The threshold can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        thresholds: Threshold values for transformation.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = cast_scalar_to_int(raster.nodata if nodata is None else nodata)\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection.\")\n\n    if check_parameter_length(bands, thresholds) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid threshold length.\")\n\n    expanded_args = expand_and_zip(bands, thresholds)\n    thresholds = [element[1] for element in expanded_args]\n\n    out_settings = {}\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        inital_dtype = band_array.dtype\n\n        band_mask = np.isin(band_array, nodata)\n        band_array = _binarize(band_array, threshold=thresholds[i])\n        band_array = np.where(band_mask, nodata, band_array)\n\n        if not check_dtype_for_int(nodata):\n            band_array = band_array.astype(inital_dtype)\n        else:\n            band_array = band_array.astype(np.min_scalar_type(nodata))\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"threshold\": thresholds[i],\n            \"nodata\": nodata,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/clip/","title":"Clip","text":""},{"location":"transformations/clip/#eis_toolkit.transformations.clip.clip_transform","title":"clip_transform(raster, limits, bands=None, nodata=None)","text":"

    Clips data based on specified upper and lower limits.

    Takes one nodata value that will be ignored in calculations. Replaces values below the lower limit and above the upper limit with provided values, respecively. Works both one-sided and two-sided but raises error if no limits provided.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The limits can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None limits Sequence[Tuple[Optional[Number], Optional[Number]]]

    Lower and upper limits (lower, upper) as real values.

    required nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values).

    Source code in eis_toolkit/transformations/clip.py
    @beartype\ndef clip_transform(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    limits: Sequence[Tuple[Optional[Number], Optional[Number]]],\n    bands: Optional[Sequence[int]] = None,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Clips data based on specified upper and lower limits.\n\n    Takes one nodata value that will be ignored in calculations.\n    Replaces values below the lower limit and above the upper limit with provided values, respecively.\n    Works both one-sided and two-sided but raises error if no limits provided.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The limits can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        limits: Lower and upper limits (lower, upper) as real values.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values).\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, limits) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid limit length.\")\n\n    for item in limits:\n        if item.count(None) == len(item):\n            raise InvalidParameterValueException(f\"Limit values all None: {item}.\")\n\n        if not check_minmax_position(item):\n            raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n    expanded_args = expand_and_zip(bands, limits)\n    limits = [element[1] for element in expanded_args]\n\n    out_settings = {}\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        inital_dtype = band_array.dtype\n\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n        band_array = _clip_transform(band_array, limits=limits[i])\n\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"limit_lower\": cast_scalar_to_int(limits[i][0]),\n            \"limit_upper\": cast_scalar_to_int(limits[i][1]),\n            \"nodata\": cast_scalar_to_int(nodata),\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/linear/","title":"Linear","text":""},{"location":"transformations/linear/#eis_toolkit.transformations.linear.min_max_scaling","title":"min_max_scaling(raster, bands=None, new_range=[(0, 1)], nodata=None)","text":"

    Normalize data based on a specified new range.

    Uses the provided new minimum and maximum to transform data into the new interval. Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used. The new_range can be set for each band individually. If a parameter contains only 1 entry, it will be applied for all bands.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None new_range Sequence[Tuple[Number, Number]]

    The new interval data will be transformed into. First value corresponds to min, second to max.

    [(0, 1)] nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values).

    Source code in eis_toolkit/transformations/linear.py
    @beartype\ndef min_max_scaling(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    new_range: Sequence[Tuple[Number, Number]] = [(0, 1)],\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Normalize data based on a specified new range.\n\n    Uses the provided new minimum and maximum to transform data into the new interval.\n    Takes one nodata value that will be ignored in calculations.\n\n    If no band/column selection specified, all bands/columns will be used.\n    The new_range can be set for each band individually.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        new_range: The new interval data will be transformed into. First value corresponds to min, second to max.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values).\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, new_range) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid new_range length\")\n\n    for item in new_range:\n        if not check_minmax_position(item):\n            raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}\")\n\n    expanded_args = expand_and_zip(bands, new_range)\n    new_range = [element[1] for element in expanded_args]\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n        band_array = _min_max_scaling(band_array.astype(np.float64), new_range=new_range[i])\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"scaled_min\": new_range[i][0],\n            \"scaled_max\": new_range[i][1],\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/linear/#eis_toolkit.transformations.linear.z_score_normalization","title":"z_score_normalization(raster, bands=None, nodata=None)","text":"

    Normalize data based on mean and standard deviation.

    Results will have a mean = 0 and standard deviation = 1. Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    Source code in eis_toolkit/transformations/linear.py
    @beartype\ndef z_score_normalization(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Normalize data based on mean and standard deviation.\n\n    Results will have a mean = 0 and standard deviation = 1.\n    Takes one nodata value that will be ignored in calculations.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection.\")\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n        band_array, mean_array, sd_array = _z_score_normalization(band_array.astype(np.float64))\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"original_mean\": truncate_decimal_places(mean_array, decimal_places=out_decimals),\n            \"original_sd\": truncate_decimal_places(sd_array, decimal_places=out_decimals),\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/logarithmic/","title":"Logarithmic","text":""},{"location":"transformations/logarithmic/#eis_toolkit.transformations.logarithmic.log_transform","title":"log_transform(raster, bands=None, log_transform=['log2'], nodata=None)","text":"

    Perform a logarithmic transformation on the provided data.

    Takes one nodata value that will be ignored in calculations. Negative values will not be considered for transformation and replaced by the specific nodata value.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The log_transform can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None log_transform Sequence[str]

    The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.

    ['log2'] nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands

    InvalidParameterValueException

    The input does not match the requirements (values, order of values)

    Source code in eis_toolkit/transformations/logarithmic.py
    @beartype\ndef log_transform(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    log_transform: Sequence[str] = [\"log2\"],\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Perform a logarithmic transformation on the provided data.\n\n    Takes one nodata value that will be ignored in calculations.\n    Negative values will not be considered for transformation and replaced by the specific nodata value.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The log_transform can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        log_transform: The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands\n        InvalidParameterValueException: The input does not match the requirements (values, order of values)\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, log_transform) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid length for log-base values.\")\n\n    for item in log_transform:\n        if not (item == \"ln\" or item == \"log2\" or item == \"log10\"):\n            raise InvalidParameterValueException(f\"Invalid method: {item}.\")\n\n    expanded_args = expand_and_zip(bands, log_transform)\n    log_transform = [element[1] for element in expanded_args]\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n        band_array[band_array <= 0] = np.nan\n\n        if log_transform[i] == \"ln\":\n            band_array = _log_transform_ln(band_array.astype(np.float64))\n        elif log_transform[i] == \"log2\":\n            band_array = _log_transform_log2(band_array.astype(np.float64))\n        elif log_transform[i] == \"log10\":\n            band_array = _log_transform_log10(band_array.astype(np.float64))\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"log_transform\": log_transform[i],\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/sigmoid/","title":"Sigmoid","text":""},{"location":"transformations/sigmoid/#eis_toolkit.transformations.sigmoid.sigmoid_transform","title":"sigmoid_transform(raster, bands=None, bounds=[(0, 1)], slope=[1], center=True, nodata=None)","text":"

    Transform data into a sigmoid-shape based on a specified new range.

    Uses the provided new minimum and maximum, shift and slope parameters to transform the data. Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The bounds and slope values can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None bounds Sequence[Tuple[Number, Number]]

    Boundaries for the calculation of the sigmoid function (lower, upper).

    [(0, 1)] slope Sequence[Number]

    Value which modifies the slope of the resulting sigmoid-curve.

    [1] center bool

    Center array values around mean = 0 before sigmoid transformation.

    True nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values)

    Source code in eis_toolkit/transformations/sigmoid.py
    @beartype\ndef sigmoid_transform(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    bounds: Sequence[Tuple[Number, Number]] = [(0, 1)],\n    slope: Sequence[Number] = [1],\n    center: bool = True,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Transform data into a sigmoid-shape based on a specified new range.\n\n    Uses the provided new minimum and maximum, shift and slope parameters to transform the data.\n    Takes one nodata value that will be ignored in calculations.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The bounds and slope values can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        bounds: Boundaries for the calculation of the sigmoid function (lower, upper).\n        slope: Value which modifies the slope of the resulting sigmoid-curve.\n        center: Center array values around mean = 0 before sigmoid transformation.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values)\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    for parameter_name, parameter in [(\"bounds\", bounds), (\"slope\", slope)]:\n        if check_parameter_length(bands, parameter) is False:\n            raise NonMatchingParameterLengthsException(f\"Invalid length for {parameter_name}.\")\n\n    for item in bounds:\n        if check_minmax_position(item) is False:\n            raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n    expanded_args = expand_and_zip(bands, bounds, slope)\n    bounds = [element[1] for element in expanded_args]\n    slope = [element[2] for element in expanded_args]\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n        band_array = _sigmoid_transform(band_array.astype(np.float64), bounds=bounds[i], slope=slope[i], center=center)\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"bound_lower\": truncate_decimal_places(bounds[i][0], decimal_places=out_decimals),\n            \"bound_upper\": truncate_decimal_places(bounds[i][1], decimal_places=out_decimals),\n            \"slope\": slope[i],\n            \"center\": center,\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/winsorize/","title":"Winsorize","text":""},{"location":"transformations/winsorize/#eis_toolkit.transformations.winsorize.winsorize","title":"winsorize(raster, percentiles, bands=None, inside=False, nodata=None)","text":"

    Winsorize data based on specified percentile values.

    Takes one nodata value that will be ignored in calculations. Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided. Works both one-sided and two-sided but raises error if no percentile values provided.

    Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%]. And percentile_upper = 10 corresponds to the intervall [90%, max]. I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.

    Calculation of percentiles is ambiguous. Users can choose whether to use the value for replacement from inside or outside of the respective interval. Example: Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated percentiles are (5, 35) for inside and (10, 30) for outside. This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The percentiles can be set for each band individually, but inside parameter is same for all bands.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None percentiles Sequence[Tuple[Optional[Number], Optional[Number]]]

    Lower and upper percentile values (lower, upper) between [0, 100].

    required inside bool

    Whether to use the value for replacement from the left or right of the calculated percentile.

    False nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values)

    Source code in eis_toolkit/transformations/winsorize.py
    @beartype\ndef winsorize(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    percentiles: Sequence[Tuple[Optional[Number], Optional[Number]]],\n    bands: Optional[Sequence[int]] = None,\n    inside: bool = False,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Winsorize data based on specified percentile values.\n\n    Takes one nodata value that will be ignored in calculations.\n    Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided.\n    Works both one-sided and two-sided but raises error if no percentile values provided.\n\n    Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%].\n    And percentile_upper = 10 corresponds to the intervall [90%, max].\n    I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.\n\n    Calculation of percentiles is ambiguous. Users can choose whether to use the value\n    for replacement from inside or outside of the respective interval. Example:\n    Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated\n    percentiles are (5, 35) for inside and (10, 30) for outside.\n    This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The percentiles can be set for each band individually, but inside parameter is same for all bands.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        percentiles: Lower and upper percentile values (lower, upper) between [0, 100].\n        inside: Whether to use the value for replacement from the left or right of the calculated percentile.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values)\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, percentiles) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid length for percentiles.\")\n\n    for item in percentiles:\n        if item.count(None) == len(item):\n            raise InvalidParameterValueException(f\"Percentile values all None: {item}.\")\n\n        if None not in item and sum(item) >= 100:\n            raise InvalidParameterValueException(f\"Sum >= 100: {item}.\")\n\n        if item[0] is not None and not (0 < item[0] < 100):\n            raise InvalidParameterValueException(f\"Invalid lower percentile value: {item}.\")\n\n        if item[1] is not None and not (0 < item[1] < 100):\n            raise InvalidParameterValueException(f\"Invalid upper percentile value: {item}.\")\n\n    expanded_args = expand_and_zip(bands, percentiles)\n    percentiles = [element[1] for element in expanded_args]\n\n    out_settings = {}\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        inital_dtype = band_array.dtype\n\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n        band_array, calculated_lower, calculated_upper = _winsorize(\n            band_array, percentiles=percentiles[i], inside=inside\n        )\n\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"percentile_lower\": cast_scalar_to_int(percentiles[i][0]),\n            \"percentile_upper\": cast_scalar_to_int(percentiles[i][1]),\n            \"calculated_lower\": cast_scalar_to_int(calculated_lower),\n            \"calculated_upper\": cast_scalar_to_int(calculated_upper),\n            \"nodata\": cast_scalar_to_int(nodata),\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"validation/calculate_auc/","title":"Calculate AUC","text":""},{"location":"validation/calculate_auc/#eis_toolkit.validation.calculate_auc.calculate_auc","title":"calculate_auc(x_values, y_values)","text":"

    Calculate area under curve (AUC).

    Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.

    Parameters:

    Name Type Description Default x_values ndarray

    Either proportion of area or false positive rate values.

    required y_values ndarray

    True positive rate values.

    required

    Returns:

    Type Description float

    The area under curve.

    Raises:

    Type Description InvalidParameterValueException

    x_values or y_values are out of bounds.

    Source code in eis_toolkit/validation/calculate_auc.py
    @beartype\ndef calculate_auc(x_values: np.ndarray, y_values: np.ndarray) -> float:\n    \"\"\"Calculate area under curve (AUC).\n\n    Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be\n    always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.\n\n    Args:\n        x_values: Either proportion of area or false positive rate values.\n        y_values: True positive rate values.\n\n    Returns:\n        The area under curve.\n\n    Raises:\n        InvalidParameterValueException: x_values or y_values are out of bounds.\n    \"\"\"\n    if x_values.max() > 1 or x_values.min() < 0:\n        raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n    if y_values.max() > 1 or y_values.min() < 0:\n        raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n    auc_value = _calculate_auc(x_values=x_values, y_values=y_values)\n    return auc_value\n
    "},{"location":"validation/calculate_base_metrics/","title":"Calculate base metrics","text":""},{"location":"validation/calculate_base_metrics/#eis_toolkit.validation.calculate_base_metrics.calculate_base_metrics","title":"calculate_base_metrics(raster, deposits, band=1, negatives=None)","text":"

    Calculate true positive rate, proportion of area and false positive rate values for different thresholds.

    Function calculates true positive rate, proportion of area and false positive rate values for different thresholds which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false positive rate is optional and is only done if negative point locations are provided.

    Parameters:

    Name Type Description Default raster DatasetReader

    Mineral prospectivity map or evidence layer.

    required deposits GeoDataFrame

    Mineral deposit locations as points.

    required band int

    Band index of the mineral prospectivity map. Defaults to 1.

    1 negatives Optional[GeoDataFrame]

    Negative locations as points.

    None

    Returns:

    Type Description DataFrame

    DataFrame containing true positive rate, proportion of area, threshold values and false positive rate (optional) values.

    Raises:

    Type Description NonMatchingCrsException

    The raster and point data are not in the same CRS.

    NotApplicableGeometryTypeException

    The input geometries contain non-point features.

    Source code in eis_toolkit/validation/calculate_base_metrics.py
    @beartype\ndef calculate_base_metrics(\n    raster: rasterio.io.DatasetReader,\n    deposits: geopandas.GeoDataFrame,\n    band: int = 1,\n    negatives: Optional[geopandas.GeoDataFrame] = None,\n) -> pd.DataFrame:\n    \"\"\"Calculate true positive rate, proportion of area and false positive rate values for different thresholds.\n\n    Function calculates true positive rate, proportion of area and false positive rate values for different thresholds\n    which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false\n    positive rate is optional and is only done if negative point locations are provided.\n\n    Args:\n        raster: Mineral prospectivity map or evidence layer.\n        deposits: Mineral deposit locations as points.\n        band: Band index of the mineral prospectivity map. Defaults to 1.\n        negatives: Negative locations as points.\n\n    Returns:\n        DataFrame containing true positive rate, proportion of area, threshold values and false positive\n            rate (optional) values.\n\n    Raises:\n        NonMatchingCrsException: The raster and point data are not in the same CRS.\n        NotApplicableGeometryTypeException: The input geometries contain non-point features.\n    \"\"\"\n    if negatives is not None:\n        geometries = pd.concat([deposits, negatives]).geometry\n    else:\n        geometries = deposits[\"geometry\"]\n\n    if not check_matching_crs(\n        objects=[raster, geometries],\n    ):\n        raise NonMatchingCrsException(\"The raster and deposits are not in the same CRS.\")\n\n    if not check_geometry_types(\n        geometries=geometries,\n        allowed_types=[\"Point\"],\n    ):\n        raise NotApplicableGeometryTypeException(\"The input geometries contain non-point features.\")\n\n    base_metrics = _calculate_base_metrics(raster=raster, deposits=deposits, band=band, negatives=negatives)\n\n    return base_metrics\n
    "},{"location":"validation/get_pa_intersection/","title":"Get P-A plot intersection point","text":""},{"location":"validation/get_pa_intersection/#eis_toolkit.validation.get_pa_intersection.get_pa_intersection","title":"get_pa_intersection(true_positive_rate_values, proportion_of_area_values, threshold_values)","text":"

    Calculate the intersection point for prediction rate and area curves in (P-A plot).

    Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis. Area curve uses inverted proportion of area as y-axis.

    Parameters:

    Name Type Description Default true_positive_rate_values ndarray

    True positive rate values, values should be within range 0-1.

    required proportion_of_area_values ndarray

    Proportion of area values, values should be within range 0-1.

    required threshold_values ndarray

    Threshold values that were used to calculate true positive rate and proportion of area.

    required

    Returns:

    Type Description Tuple[float, float]

    X and y coordinates of the intersection point.

    Raises:

    Type Description InvalidParameterValueException

    true_positive_rate_values or proportion_of_area_values values are out of bounds.

    Source code in eis_toolkit/validation/get_pa_intersection.py
    @beartype\ndef get_pa_intersection(\n    true_positive_rate_values: np.ndarray, proportion_of_area_values: np.ndarray, threshold_values: np.ndarray\n) -> Tuple[float, float]:\n    \"\"\"Calculate the intersection point for prediction rate and area curves in (P-A plot).\n\n    Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis.\n    Area curve uses inverted proportion of area as y-axis.\n\n    Args:\n        true_positive_rate_values: True positive rate values, values should be within range 0-1.\n        proportion_of_area_values: Proportion of area values, values should be within range 0-1.\n        threshold_values: Threshold values that were used to calculate true positive rate and proportion of area.\n\n    Returns:\n        X and y coordinates of the intersection point.\n\n    Raises:\n        InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n    \"\"\"\n    if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n        raise InvalidParameterValueException(\"true_positive_rate_values values should be within range 0-1\")\n\n    if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n        raise InvalidParameterValueException(\"proportion_of_area_values values should be within range 0-1\")\n\n    intersection = _get_pa_intersection(\n        true_positive_rate_values=true_positive_rate_values,\n        proportion_of_area_values=proportion_of_area_values,\n        threshold_values=threshold_values,\n    )\n\n    return intersection.x, intersection.y\n
    "},{"location":"validation/plot_correlation_matrix/","title":"Plot correlation matrix","text":""},{"location":"validation/plot_correlation_matrix/#eis_toolkit.validation.plot_correlation_matrix.plot_correlation_matrix","title":"plot_correlation_matrix(matrix, annotate=True, cmap=None, plot_title=None, **kwargs)","text":"

    Create a Seaborn heatmap to visualize correlation matrix.

    Parameters:

    Name Type Description Default matrix DataFrame

    Correlation matrix as a DataFrame.

    required annotate bool

    If plot squares should display the correlation values. Defaults to True.

    True cmap Optional[ListedColormap]

    Colormap for plotting. Optional parameter. Defaults to None, in which case a default colormap is used.

    None plot_title Optional[str]

    Title of the plot. Optional parameter, defaults to none (no title).

    None **kwargs dict

    Additional parameters to pass to Seaborn and matplotlib.

    {}

    Returns:

    Type Description Axes

    Matplotlib axes object with the produced plot.

    Raises:

    Type Description EmptyDataFrameException

    Input matrix is empty.

    Source code in eis_toolkit/validation/plot_correlation_matrix.py
    def plot_correlation_matrix(\n    matrix: pd.DataFrame,\n    annotate: bool = True,\n    cmap: Optional[matplotlib.colors.ListedColormap] = None,\n    plot_title: Optional[str] = None,\n    **kwargs: dict\n) -> matplotlib.axes.Axes:\n    \"\"\"\n    Create a Seaborn heatmap to visualize correlation matrix.\n\n    Args:\n        matrix: Correlation matrix as a DataFrame.\n        annotate: If plot squares should display the correlation values. Defaults to True.\n        cmap: Colormap for plotting. Optional parameter. Defaults to None, in which\n            case a default colormap is used.\n        plot_title: Title of the plot. Optional parameter, defaults to none (no title).\n        **kwargs: Additional parameters to pass to Seaborn and matplotlib.\n\n    Returns:\n        Matplotlib axes object with the produced plot.\n\n    Raises:\n        EmptyDataFrameException: Input matrix is empty.\n    \"\"\"\n    if matrix.empty:\n        raise exceptions.EmptyDataFrameException(\"Input matrix DataFrame is empty.\")\n\n    # Mask for the upper triangle of the heatmap\n    mask = np.triu(np.ones_like(matrix, dtype=bool))\n\n    if cmap is None:\n        # Generate a default diverging colormap\n        cmap = sns.diverging_palette(230, 20, as_cmap=True)\n\n    ax = sns.heatmap(\n        matrix,\n        mask=mask,\n        cmap=cmap,\n        vmax=0.3,\n        center=0,\n        square=True,\n        linewidths=0.5,\n        annot=annotate,\n        cbar_kws={\"shrink\": 0.5},\n        **kwargs\n    )\n    if plot_title is not None:\n        ax.set_title(plot_title)\n\n    return ax\n
    "},{"location":"validation/plot_prediction_area_curves/","title":"Plot prediction-area (P-A) curves","text":""},{"location":"validation/plot_prediction_area_curves/#eis_toolkit.validation.plot_prediction_area_curves.plot_prediction_area_curves","title":"plot_prediction_area_curves(true_positive_rate_values, proportion_of_area_values, threshold_values)","text":"

    Plot prediction-area (P-A) plot.

    Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g., Yousefi and Carranza (2015).

    Parameters:

    Name Type Description Default true_positive_rate_values ndarray

    True positive rate values.

    required proportion_of_area_values ndarray

    Proportion of area values.

    required threshold_values ndarray

    Threshold values.

    required

    Returns:

    Type Description Figure

    P-A plot figure object.

    Raises:

    Type Description InvalidParameterValueException

    true_positive_rate_values or proportion_of_area_values values are out of bounds.

    References

    Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.

    Source code in eis_toolkit/validation/plot_prediction_area_curves.py
    @beartype\ndef plot_prediction_area_curves(\n    true_positive_rate_values: np.ndarray, proportion_of_area_values: np.ndarray, threshold_values: np.ndarray\n) -> matplotlib.figure.Figure:\n    \"\"\"Plot prediction-area (P-A) plot.\n\n    Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g.,\n    Yousefi and Carranza (2015).\n\n    Args:\n        true_positive_rate_values: True positive rate values.\n        proportion_of_area_values: Proportion of area values.\n        threshold_values: Threshold values.\n\n    Returns:\n        P-A plot figure object.\n\n    Raises:\n        InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n\n    References:\n        Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral\n        prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.\n    \"\"\"\n    if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n        raise InvalidParameterValueException(\"true_positive_rate values should be within range 0-1\")\n\n    if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n        raise InvalidParameterValueException(\"proportion_of_area values should be within range 0-1\")\n\n    fig = _plot_prediction_area_curves(\n        true_positive_rate_values=true_positive_rate_values,\n        proportion_of_area_values=proportion_of_area_values,\n        threshold_values=threshold_values,\n    )\n    return fig\n
    "},{"location":"validation/plot_rate_curve/","title":"Plot rate curve","text":""},{"location":"validation/plot_rate_curve/#eis_toolkit.validation.plot_rate_curve.plot_rate_curve","title":"plot_rate_curve(x_values, y_values, plot_type='success_rate')","text":"

    Plot success rate, prediction rate or ROC curve.

    Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.

    Parameters:

    Name Type Description Default x_values ndarray

    False positive rate values or proportion of area values.

    required y_values ndarray

    True positive rate values.

    required plot_type str

    Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".

    'success_rate'

    Returns:

    Type Description Figure

    Success rate, prediction rate or ROC plot figure object.

    Raises:

    Type Description InvalidParameterValueException

    Invalid plot type.

    InvalidParameterValueException

    x_values or y_values are out of bounds.

    Source code in eis_toolkit/validation/plot_rate_curve.py
    @beartype\ndef plot_rate_curve(\n    x_values: np.ndarray,\n    y_values: np.ndarray,\n    plot_type: str = \"success_rate\",\n) -> matplotlib.figure.Figure:\n    \"\"\"Plot success rate, prediction rate or ROC curve.\n\n    Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false\n    positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.\n\n    Args:\n        x_values: False positive rate values or proportion of area values.\n        y_values: True positive rate values.\n        plot_type: Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".\n\n    Returns:\n        Success rate, prediction rate or ROC plot figure object.\n\n    Raises:\n        InvalidParameterValueException: Invalid plot type.\n        InvalidParameterValueException: x_values or y_values are out of bounds.\n    \"\"\"\n    if plot_type == \"success_rate\":\n        label = \"Success rate\"\n        xlab = \"Proportion of area\"\n    elif plot_type == \"prediction_rate\":\n        label = \"Prediction rate\"\n        xlab = \"Proportion of area\"\n    elif plot_type == \"roc\":\n        label = \"ROC\"\n        xlab = \"False positive rate\"\n    else:\n        raise InvalidParameterValueException(\"Invalid plot type\")\n\n    if x_values.max() > 1 or x_values.min() < 0:\n        raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n    if y_values.max() > 1 or y_values.min() < 0:\n        raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n    fig = _plot_rate_curve(x_values=x_values, y_values=y_values, label=label, xlab=xlab)\n\n    return fig\n
    "},{"location":"vector_processing/cell_based_association/","title":"Cell-Based Association","text":""},{"location":"vector_processing/cell_based_association/#eis_toolkit.vector_processing.cell_based_association.cell_based_association","title":"cell_based_association(cell_size, geodata, output_path, column=None, subset_target_attribute_values=None, add_name=None, add_buffer=None)","text":"

    Creation of CBA matrix.

    Initializes a CBA matrix from a vector file. The mesh is calculated according to the geometries contained in this file and the size of cells. Allows to add multiple vector data to the matrix, based on targeted shapes and/or attributes.

    Parameters:

    Name Type Description Default cell_size int

    Size of the cells.

    required geodata List[GeoDataFrame]

    GeoDataFrame to create the CBA matrix. Additional GeoDataFrame(s) can be imputed to add to the CBA matrix.

    required output_path str

    Name of the saved .tif file.

    required column Optional[List[str]]

    Name of the column of interest. If no attribute is specified, then an artificial attribute is created representing the presence or absence of the geometries of this file for each cell of the CBA grid. A categorical attribute will generate as many columns (binary) in the CBA matrix than values considered of interest (dummification). See parameter . Additional column(s) can be imputed for each added GeoDataFrame(s). None subset_target_attribute_values Optional[List[Union[None, list, str]]]

    List of values of interest of the target attribute, in case a categorical target attribute has been specified. Allows to filter a subset of relevant values. Additional values can be imputed for each added GeoDataFrame(s).

    None add_name Optional[List[Union[str, None]]]

    Name of the column(s) to add to the matrix.

    None add_buffer Optional[List[Union[Number, bool]]]

    Allow the use of a buffer around shapes before the intersection with CBA cells for the added GeoDataFrame(s). Minimize border effects or allow increasing positive samples (i.e. cells with mineralization). The size of the buffer is computed using the CRS (if projected CRS in meters: value in meters).

    None

    Returns:

    Type Description GeoDataFrame

    CBA matrix is created.

    Source code in eis_toolkit/vector_processing/cell_based_association.py
    @beartype\ndef cell_based_association(\n    cell_size: int,\n    geodata: List[gpd.GeoDataFrame],\n    output_path: str,\n    column: Optional[List[str]] = None,\n    subset_target_attribute_values: Optional[List[Union[None, list, str]]] = None,\n    add_name: Optional[List[Union[str, None]]] = None,\n    add_buffer: Optional[List[Union[Number, bool]]] = None,\n) -> gpd.GeoDataFrame:\n    \"\"\"Creation of CBA matrix.\n\n    Initializes a CBA matrix from a vector file. The mesh is calculated\n    according to the geometries contained in this file and the size of cells.\n    Allows to add multiple vector data to the matrix, based on targeted shapes\n    and/or attributes.\n\n    Args:\n        cell_size: Size of the cells.\n        geodata: GeoDataFrame to create the CBA matrix. Additional\n            GeoDataFrame(s) can be imputed to add to the CBA matrix.\n        output_path: Name of the saved .tif file.\n        column: Name of the column of interest. If no attribute is specified,\n            then an artificial attribute is created representing the presence\n            or absence of the geometries of this file for each cell of the CBA\n            grid. A categorical attribute will generate as many columns (binary)\n            in the CBA matrix than values considered of interest (dummification).\n            See parameter <subset_target_attribute_values>. Additional\n            column(s) can be imputed for each added GeoDataFrame(s).\n        subset_target_attribute_values: List of values of interest of the\n            target attribute, in case a categorical target attribute has been\n            specified. Allows to filter a subset of relevant values. Additional\n            values can be imputed for each added GeoDataFrame(s).\n        add_name: Name of the column(s) to add to the matrix.\n        add_buffer: Allow the use of a buffer around shapes before the\n            intersection with CBA cells for the added GeoDataFrame(s). Minimize\n            border effects or allow increasing positive samples (i.e. cells\n            with mineralization). The size of the buffer is computed using the\n            CRS (if projected CRS in meters: value in meters).\n\n    Returns:\n        CBA matrix is created.\n    \"\"\"\n\n    # Swapping None to list values\n    if column is None:\n        column = [\"\"]\n    if add_buffer is None:\n        add_buffer = [False]\n\n    # Consistency checks on input data\n    for frame in geodata:\n        if frame.empty:\n            raise exceptions.EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if cell_size <= 0:\n        raise exceptions.InvalidParameterValueException(\"Expected cell size to be positive and non-zero.\")\n\n    add_buffer = [False if x == 0 else x for x in add_buffer]\n    if any(num < 0 for num in add_buffer):\n        raise exceptions.InvalidParameterValueException(\"Expected buffer value to be positive, null or False.\")\n\n    for i, name in enumerate(column):\n        if column[i] == \"\":\n            if subset_target_attribute_values[i] is not None:\n                raise exceptions.InvalidParameterValueException(\"Can't use subset of values if no column is targeted.\")\n        elif column[i] not in geodata[i]:\n            raise exceptions.InvalidColumnException(\"Targeted column not found in the GeoDataFrame.\")\n\n    for i, subset in enumerate(subset_target_attribute_values):\n        if subset is not None:\n            for value in subset:\n                if value not in geodata[i][column[i]].unique():\n                    raise exceptions.InvalidParameterValueException(\n                        \"Subset of value(s) not found in the targeted column.\"\n                    )\n\n    # Computation\n    for i, data in enumerate(geodata):\n        if i == 0:\n            # Initialization of the CBA matrix\n            grid, cba = _init_from_vector_data(cell_size, geodata[0], column[0], subset_target_attribute_values[0])\n        else:\n            # If necessary, adding data to matrix\n            cba = _add_layer(\n                cba,\n                grid,\n                geodata[i],\n                column[i],\n                subset_target_attribute_values[i],\n                add_name[i - 1],\n                add_buffer[i - 1],\n            )\n\n    # Export\n    _to_raster(cba, output_path)\n\n    return cba\n
    "},{"location":"vector_processing/distance_computation/","title":"Distance computation","text":""},{"location":"vector_processing/distance_computation/#eis_toolkit.vector_processing.distance_computation.distance_computation","title":"distance_computation(raster_profile, geometries)","text":"

    Calculate distance from raster cell to nearest geometry.

    Parameters:

    Name Type Description Default raster_profile Union[Profile, dict]

    The raster profile of the raster in which the distances to the nearest geometry are determined.

    required geometries GeoDataFrame

    The geometries to determine distance to.

    required

    Returns:

    Type Description ndarray

    A 2D numpy array with the distances computed.

    Source code in eis_toolkit/vector_processing/distance_computation.py
    @beartype\ndef distance_computation(raster_profile: Union[profiles.Profile, dict], geometries: gpd.GeoDataFrame) -> np.ndarray:\n    \"\"\"Calculate distance from raster cell to nearest geometry.\n\n    Args:\n        raster_profile: The raster profile of the raster in which the distances\n            to the nearest geometry are determined.\n        geometries: The geometries to determine distance to.\n\n    Returns:\n        A 2D numpy array with the distances computed.\n\n    \"\"\"\n    if raster_profile.get(\"crs\") != geometries.crs:\n        raise exceptions.NonMatchingCrsException(\"Expected coordinate systems to match between raster and geometries. \")\n    if geometries.shape[0] == 0:\n        raise exceptions.EmptyDataFrameException(\"Expected GeoDataFrame to not be empty.\")\n\n    raster_width = raster_profile.get(\"width\")\n    raster_height = raster_profile.get(\"height\")\n\n    if not isinstance(raster_width, int) or not isinstance(raster_height, int):\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected raster_profile to contain integer width and height. {raster_profile}\"\n        )\n\n    raster_transform = raster_profile.get(\"transform\")\n\n    if not isinstance(raster_transform, transform.Affine):\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected raster_profile to contain an affine transformation. {raster_profile}\"\n        )\n\n    return _distance_computation(\n        raster_width=raster_width, raster_height=raster_height, raster_transform=raster_transform, geometries=geometries\n    )\n
    "},{"location":"vector_processing/idw_interpolation/","title":"IDW","text":""},{"location":"vector_processing/idw_interpolation/#eis_toolkit.vector_processing.idw_interpolation.idw","title":"idw(geodataframe, target_column, resolution, extent=None, power=2)","text":"

    Calculate inverse distance weighted (IDW) interpolation.

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The vector dataframe to be interpolated.

    required target_column str

    The column name with values for each geometry.

    required resolution Tuple[Number, Number]

    The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).

    required extent Optional[Tuple[Number, Number, Number, Number]]

    The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.

    None power Number

    The value for determining the rate at which the weights decrease. As power increases, the weights for distant points decrease rapidly. Defaults to 2.

    2

    Returns:

    Type Description Tuple[ndarray, dict]

    Rasterized vector data and metadata.

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterValueException

    Invalid resolution or target_column.

    Source code in eis_toolkit/vector_processing/idw_interpolation.py
    @beartype\ndef idw(\n    geodataframe: gpd.GeoDataFrame,\n    target_column: str,\n    resolution: Tuple[Number, Number],\n    extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n    power: Number = 2,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Calculate inverse distance weighted (IDW) interpolation.\n\n    Args:\n        geodataframe: The vector dataframe to be interpolated.\n        target_column: The column name with values for each geometry.\n        resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n        extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n            If None, calculate extent from the input vector data.\n        power: The value for determining the rate at which the weights decrease.\n            As power increases, the weights for distant points decrease rapidly.\n            Defaults to 2.\n\n    Returns:\n        Rasterized vector data and metadata.\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterValueException: Invalid resolution or target_column.\n    \"\"\"\n\n    if geodataframe.shape[0] == 0:\n        raise EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n    if target_column not in geodataframe.columns:\n        raise InvalidParameterValueException(\n            f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n        )\n\n    if resolution[0] <= 0 or resolution[1] <= 0:\n        raise InvalidParameterValueException(\"Expected height and width greater than zero.\")\n\n    interpolated_values, out_meta = _idw_interpolation(geodataframe, target_column, resolution, power, extent)\n\n    return interpolated_values, out_meta\n
    "},{"location":"vector_processing/kriging_interpolation/","title":"Kriging interpolation","text":""},{"location":"vector_processing/kriging_interpolation/#eis_toolkit.vector_processing.kriging_interpolation.kriging","title":"kriging(data, target_column, resolution, extent=None, variogram_model='linear', coordinates_type='geographic', method='ordinary')","text":"

    Perform Kriging interpolation on the input data.

    Parameters:

    Name Type Description Default data GeoDataFrame

    GeoDataFrame containing the input data.

    required target_column str

    The column name with values for each geometry.

    required resolution Tuple[Number, Number]

    The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).

    required extent Optional[Tuple[Number, Number, Number, Number]]

    The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.

    None variogram_model Literal[linear, power, gaussian, spherical, exponential]

    Variogram model to be used. Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.

    'linear' coordinates_type Literal[euclidean, geographic]

    Determines are coordinates on a plane ('euclidean') or a sphere ('geographic'). Used only in ordinary kriging. Defaults to 'geographic'.

    'geographic' method Literal[ordinary, universal]

    Ordinary or universal kriging. Defaults to 'ordinary'.

    'ordinary'

    Returns:

    Type Description Tuple[ndarray, dict]

    Grid containing the interpolated values and metadata.

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterValueException

    Target column name is invalid or resolution is not greater than zero.

    Source code in eis_toolkit/vector_processing/kriging_interpolation.py
    @beartype\ndef kriging(\n    data: gpd.GeoDataFrame,\n    target_column: str,\n    resolution: Tuple[Number, Number],\n    extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n    variogram_model: Literal[\"linear\", \"power\", \"gaussian\", \"spherical\", \"exponential\"] = \"linear\",\n    coordinates_type: Literal[\"euclidean\", \"geographic\"] = \"geographic\",\n    method: Literal[\"ordinary\", \"universal\"] = \"ordinary\",\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"\n    Perform Kriging interpolation on the input data.\n\n    Args:\n        data: GeoDataFrame containing the input data.\n        target_column: The column name with values for each geometry.\n        resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n        extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n            If None, calculate extent from the input vector data.\n        variogram_model: Variogram model to be used.\n            Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.\n        coordinates_type: Determines are coordinates on a plane ('euclidean') or a sphere ('geographic').\n            Used only in ordinary kriging. Defaults to 'geographic'.\n        method: Ordinary or universal kriging. Defaults to 'ordinary'.\n\n    Returns:\n        Grid containing the interpolated values and metadata.\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterValueException: Target column name is invalid or resolution is not greater than zero.\n    \"\"\"\n\n    if data.empty:\n        raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if target_column not in data.columns:\n        raise InvalidParameterValueException(\n            f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n        )\n\n    if resolution[0] <= 0 or resolution[1] <= 0:\n        raise InvalidParameterValueException(\"The resolution must be greater than zero.\")\n\n    data_interpolated, out_meta = _kriging(\n        data, target_column, resolution, extent, variogram_model, coordinates_type, method\n    )\n\n    return data_interpolated, out_meta\n
    "},{"location":"vector_processing/rasterize_vector/","title":"Rasterize vector","text":""},{"location":"vector_processing/rasterize_vector/#eis_toolkit.vector_processing.rasterize_vector.rasterize_vector","title":"rasterize_vector(geodataframe, resolution=None, value_column=None, default_value=1.0, fill_value=0.0, base_raster_profile=None, buffer_value=None, merge_strategy='replace')","text":"

    Transform vector data into raster data.

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The vector dataframe to be rasterized.

    required resolution Optional[float]

    The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.

    None value_column Optional[str]

    The column name with values for each geometry. If None, then default_value is used for all geometries.

    None default_value float

    Default value burned into raster cells based on geometries.

    1.0 base_raster_profile Optional[Union[Profile, dict]]

    Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.

    None fill_value float

    Value used outside the burned/rasterized geometry cells.

    0.0 buffer_value Optional[float]

    For adding a buffer around passed geometries before rasterization.

    None merge_strategy Literal[replace, add]

    How to handle overlapping geometries. \"add\" causes overlapping geometries to add together the values while \"replace\" does not. Adding them together is the basis for density computations where the density can be calculated by using a default value of 1.0 and the sum in each cell is the count of intersecting geometries.

    'replace'

    Returns:

    Type Description Tuple[ndarray, dict]

    Rasterized vector data and metadata.

    Source code in eis_toolkit/vector_processing/rasterize_vector.py
    @beartype\ndef rasterize_vector(\n    geodataframe: gpd.GeoDataFrame,\n    resolution: Optional[float] = None,\n    value_column: Optional[str] = None,\n    default_value: float = 1.0,\n    fill_value: float = 0.0,\n    base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n    buffer_value: Optional[float] = None,\n    merge_strategy: Literal[\"replace\", \"add\"] = \"replace\",\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Transform vector data into raster data.\n\n    Args:\n        geodataframe: The vector dataframe to be rasterized.\n        resolution: The resolution i.e. cell size of the output raster.\n            Optional if base_raster_profile is given.\n        value_column: The column name with values for each geometry.\n            If None, then default_value is used for all geometries.\n        default_value: Default value burned into raster cells based on geometries.\n        base_raster_profile: Base raster profile\n            to be used for determining the grid on which vectors are\n            burned in. If None, the geometries and provided resolution\n            value are used to compute grid.\n        fill_value: Value used outside the burned/rasterized geometry cells.\n        buffer_value: For adding a buffer around passed\n            geometries before rasterization.\n        merge_strategy: How to handle overlapping geometries.\n            \"add\" causes overlapping geometries to add together the\n            values while \"replace\" does not. Adding them together is the\n            basis for density computations where the density can be\n            calculated by using a default value of 1.0 and the sum in\n            each cell is the count of intersecting geometries.\n\n    Returns:\n        Rasterized vector data and metadata.\n    \"\"\"\n\n    if geodataframe.shape[0] == 0:\n        # Empty GeoDataFrame\n        raise exceptions.EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n    if resolution is None and base_raster_profile is None:\n        raise exceptions.InvalidParameterValueException(\n            \"Expected either resolution or base_raster_profile to be given.\"\n        )\n    if resolution is not None and resolution <= 0:\n        raise exceptions.NumericValueSignException(\n            f\"Expected a positive value resolution ({dict(resolution=resolution)})\"\n        )\n    if value_column is not None and value_column not in geodataframe.columns:\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected value_column ({value_column}) to be contained in geodataframe columns.\"\n        )\n    if buffer_value is not None and buffer_value < 0:\n        raise exceptions.NumericValueSignException(\n            f\"Expected a positive buffer_value ({dict(buffer_value=buffer_value)})\"\n        )\n\n    if base_raster_profile is not None and not isinstance(base_raster_profile, (profiles.Profile, dict)):\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected base_raster_profile ({type(base_raster_profile)}) to be dict or rasterio.profiles.Profile.\"\n        )\n\n    if buffer_value is not None:\n        geodataframe = geodataframe.copy()\n        geodataframe[\"geometry\"] = geodataframe[\"geometry\"].apply(lambda geom: geom.buffer(buffer_value))\n\n    return _rasterize_vector(\n        geodataframe=geodataframe,\n        value_column=value_column,\n        default_value=default_value,\n        fill_value=fill_value,\n        base_raster_profile=base_raster_profile,\n        resolution=resolution,\n        merge_alg=getattr(MergeAlg, merge_strategy),\n    )\n
    "},{"location":"vector_processing/reproject_vector/","title":"Reproject vector","text":""},{"location":"vector_processing/reproject_vector/#eis_toolkit.vector_processing.reproject_vector.reproject_vector","title":"reproject_vector(geodataframe, target_crs)","text":"

    Reprojects vector data to match given coordinate reference system (EPSG).

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The vector dataframe to be reprojected.

    required target_crs int

    Target CRS as an EPSG code.

    required

    Returns:

    Type Description GeoDataFrame

    Reprojected vector data.

    Source code in eis_toolkit/vector_processing/reproject_vector.py
    @beartype\ndef reproject_vector(geodataframe: geopandas.GeoDataFrame, target_crs: int) -> geopandas.GeoDataFrame:\n    \"\"\"Reprojects vector data to match given coordinate reference system (EPSG).\n\n    Args:\n        geodataframe: The vector dataframe to be reprojected.\n        target_crs: Target CRS as an EPSG code.\n\n    Returns:\n        Reprojected vector data.\n    \"\"\"\n\n    if geodataframe.crs.to_epsg() == target_crs:\n        raise MatchingCrsException(\"Vector data is already in the target CRS.\")\n\n    reprojected_gdf = geodataframe.to_crs(\"epsg:\" + str(target_crs))\n    return reprojected_gdf\n
    "},{"location":"vector_processing/vector_density/","title":"Vector density","text":""},{"location":"vector_processing/vector_density/#eis_toolkit.vector_processing.vector_density.vector_density","title":"vector_density(geodataframe, resolution=None, base_raster_profile=None, buffer_value=None, statistic='density')","text":"

    Compute density of geometries within raster.

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The dataframe with vectors of which density is computed.

    required resolution Optional[float]

    The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.

    None base_raster_profile Optional[Union[Profile, dict]]

    Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.

    None buffer_value Optional[float]

    For adding a buffer around passed geometries before computing density.

    None

    Returns:

    Type Description Tuple[ndarray, dict]

    Computed density of vector data and metadata.

    Source code in eis_toolkit/vector_processing/vector_density.py
    @beartype\ndef vector_density(\n    geodataframe: gpd.GeoDataFrame,\n    resolution: Optional[float] = None,\n    base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n    buffer_value: Optional[float] = None,\n    statistic: Literal[\"density\", \"count\"] = \"density\",\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Compute density of geometries within raster.\n\n    Args:\n        geodataframe: The dataframe with vectors\n            of which density is computed.\n        resolution: The resolution i.e. cell size of the output raster.\n            Optional if base_raster_profile is given.\n        base_raster_profile: Base raster profile\n            to be used for determining the grid on which vectors are\n            burned in. If None, the geometries and provided resolution\n            value are used to compute grid.\n        buffer_value: For adding a buffer around passed\n            geometries before computing density.\n\n    Returns:\n        Computed density of vector data and metadata.\n    \"\"\"\n    out_raster_array, out_metadata = rasterize_vector(\n        geodataframe=geodataframe,\n        resolution=resolution,\n        base_raster_profile=base_raster_profile,\n        buffer_value=buffer_value,\n        value_column=None,\n        default_value=1.0,\n        fill_value=0.0,\n        merge_strategy=\"add\",\n    )\n    max_count = np.max(out_raster_array)\n    if statistic == \"count\" or np.isclose(max_count, 0.0):\n        return out_raster_array, out_metadata\n    else:\n        return (out_raster_array / max_count), out_metadata\n
    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"General","text":"

    This is the documentation site of the eis_toolkit python package. Here you can find documentation for each module. The documentation is automatically generated from docstrings.

    Development of eis_toolkit is related to EIS Horizon EU project.

    "},{"location":"dependency_licenses/","title":"Dependency licenses","text":"Name Version License protobuf 3.19.4 3-Clause BSD License tensorboard-plugin-wit 1.8.1 Apache 2.0 absl-py 1.2.0 Apache Software License flatbuffers 1.12 Apache Software License ghp-import 2.1.0 Apache Software License google-auth 2.11.0 Apache Software License google-auth-oauthlib 0.4.6 Apache Software License google-pasta 0.2.0 Apache Software License grpcio 1.48.1 Apache Software License importlib-metadata 4.12.0 Apache Software License keras 2.9.0 Apache Software License libclang 14.0.6 Apache Software License requests 2.28.1 Apache Software License rsa 4.9 Apache Software License tenacity 8.2.2 Apache Software License tensorboard 2.9.1 Apache Software License tensorboard-data-server 0.6.1 Apache Software License tensorflow 2.9.2 Apache Software License tensorflow-estimator 2.9.0 Apache Software License tensorflow-io-gcs-filesystem 0.26.0 Apache Software License watchdog 2.1.9 Apache Software License packaging 21.3 Apache Software License; BSD License python-dateutil 2.8.2 Apache Software License; BSD License affine 2.3.1 BSD cligj 0.7.2 BSD geopandas 0.11.1 BSD Fiona 1.8.21 BSD License Jinja2 3.1.2 BSD License Markdown 3.3.7 BSD License MarkupSafe 2.1.1 BSD License Pygments 2.13.0 BSD License Shapely 1.8.4 BSD License Werkzeug 2.2.2 BSD License astunparse 1.6.3 BSD License click 8.1.3 BSD License click-plugins 1.1.1 BSD License cycler 0.11.0 BSD License gast 0.4.0 BSD License h5py 3.7.0 BSD License idna 3.3 BSD License joblib 1.1.0 BSD License kiwisolver 1.4.4 BSD License mkdocs 1.3.1 BSD License numpy 1.23.2 BSD License oauthlib 3.2.0 BSD License pandas 1.4.4 BSD License patsy 0.5.2 BSD License pyasn1 0.4.8 BSD License pyasn1-modules 0.2.8 BSD License rasterio 1.3.2 BSD License requests-oauthlib 1.3.1 BSD License scikit-learn 1.1.2 BSD License scipy 1.9.1 BSD License statsmodels 0.13.2 BSD License threadpoolctl 3.1.0 BSD License wrapt 1.14.1 BSD License eis-toolkit 0.1.0 European Union Public Licence 1.2 (EUPL 1.2) Pillow 9.2.0 Historical Permission Notice and Disclaimer (HPND) opt-einsum 3.3.0 MIT snuggs 1.4.7 MIT GDAL 3.4.3 MIT License Keras-Preprocessing 1.1.2 MIT License PyYAML 6.0 MIT License attrs 22.1.0 MIT License cachetools 5.2.0 MIT License charset-normalizer 2.1.1 MIT License fonttools 4.37.1 MIT License mergedeep 1.3.4 MIT License mkdocs-material 8.4.2 MIT License mkdocs-material-extensions 1.0.3 MIT License munch 2.5.0 MIT License plotly 5.14.0 MIT License pymdown-extensions 9.5 MIT License pyparsing 3.0.9 MIT License pyproj 3.3.1 MIT License pytz 2022.2.1 MIT License pyyaml_env_tag 0.1 MIT License setuptools-scm 6.4.2 MIT License six 1.16.0 MIT License termcolor 1.1.0 MIT License tomli 2.0.1 MIT License urllib3 1.26.12 MIT License zipp 3.8.1 MIT License certifi 2022.6.15 Mozilla Public License 2.0 (MPL 2.0) matplotlib 3.5.3 Python Software Foundation License typing_extensions 4.3.0 Python Software Foundation License"},{"location":"conversions/csv_to_geodataframe/","title":"Convert csv to geodataframe","text":""},{"location":"conversions/csv_to_geodataframe/#eis_toolkit.conversions.csv_to_geodataframe.csv_to_geodataframe","title":"csv_to_geodataframe(csv, indexes, target_crs)","text":"

    Read CSV file to a GeoDataFrame.

    Usage of single index expects valid WKT geometry. Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.

    Parameters:

    Name Type Description Default csv Path

    Path to the .csv file to be read.

    required indexes Sequence[int]

    Index(es) of the geometry column(s).

    required target_crs int

    Target CRS as an EPSG code.

    required

    Returns:

    Type Description GeoDataFrame

    CSV file read to a GeoDataFrame.

    Source code in eis_toolkit/conversions/csv_to_geodataframe.py
    @beartype\ndef csv_to_geodataframe(\n    csv: Path,\n    indexes: Sequence[int],\n    target_crs: int,\n) -> geopandas.GeoDataFrame:\n    \"\"\"\n    Read CSV file to a GeoDataFrame.\n\n    Usage of single index expects valid WKT geometry.\n    Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.\n\n    Args:\n        csv: Path to the .csv file to be read.\n        indexes: Index(es) of the geometry column(s).\n        target_crs: Target CRS as an EPSG code.\n\n    Returns:\n        CSV file read to a GeoDataFrame.\n    \"\"\"\n\n    data_frame = _csv_to_geodataframe(\n        csv=csv,\n        indexes=indexes,\n        target_crs=target_crs,\n    )\n    return data_frame\n
    "},{"location":"conversions/raster_to_dataframe/","title":"Convert raster to dataframe","text":""},{"location":"conversions/raster_to_dataframe/#eis_toolkit.conversions.raster_to_dataframe.raster_to_dataframe","title":"raster_to_dataframe(raster, bands=None, add_coordinates=False)","text":"

    Convert raster to Pandas DataFrame.

    If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g., band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to dataframe by setting add_coordinates to True.

    Parameters:

    Name Type Description Default raster DatasetReader

    Raster to be converted.

    required bands Optional[Sequence[int]]

    Selected bands from multiband raster. Indexing begins from one. Defaults to None.

    None add_coordinates bool

    Determines if pixel coordinates are written into dataframe. Defaults to False.

    False

    Returns:

    Type Description DataFrame

    Raster converted to a DataFrame.

    Source code in eis_toolkit/conversions/raster_to_dataframe.py
    @beartype\ndef raster_to_dataframe(\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    add_coordinates: bool = False,\n) -> pd.DataFrame:\n    \"\"\"Convert raster to Pandas DataFrame.\n\n    If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g.,\n    band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to\n    dataframe by setting add_coordinates to True.\n\n    Args:\n        raster: Raster to be converted.\n        bands: Selected bands from multiband raster. Indexing begins from one. Defaults to None.\n        add_coordinates: Determines if pixel coordinates are written into dataframe. Defaults to False.\n\n    Returns:\n        Raster converted to a DataFrame.\n    \"\"\"\n\n    data_frame = _raster_to_dataframe(\n        raster=raster,\n        bands=bands,\n        add_coordinates=add_coordinates,\n    )\n    return data_frame\n
    "},{"location":"exploratory_analyses/dbscan/","title":"DBSCAN","text":""},{"location":"exploratory_analyses/dbscan/#eis_toolkit.exploratory_analyses.dbscan.dbscan","title":"dbscan(data, max_distance=0.5, min_samples=5)","text":"

    Perform DBSCAN clustering on the input data.

    Parameters:

    Name Type Description Default data GeoDataFrame

    GeoDataFrame containing the input data.

    required max_distance float

    The maximum distance between two samples for one to be considered as in the neighborhood of the other. Defaults to 0.5.

    0.5 min_samples int

    The number of samples in a neighborhood for a point to be considered as a core point. Defaults to 5.

    5

    Returns:

    Type Description GeoDataFrame

    GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a point is a core point (1) or not (0).

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterException

    The maximum distance between two samples in a neighborhood is not greater than zero or the number of samples in a neighborhood is not greater than one.

    Source code in eis_toolkit/exploratory_analyses/dbscan.py
    @beartype\ndef dbscan(data: gdp.GeoDataFrame, max_distance: float = 0.5, min_samples: int = 5) -> gdp.GeoDataFrame:\n    \"\"\"\n    Perform DBSCAN clustering on the input data.\n\n    Args:\n        data: GeoDataFrame containing the input data.\n        max_distance: The maximum distance between two samples for one to be considered as in the neighborhood of\n            the other. Defaults to 0.5.\n        min_samples: The number of samples in a neighborhood for a point to be considered as a core point.\n            Defaults to 5.\n\n    Returns:\n        GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a\n            point is a core point (1) or not (0).\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterException: The maximum distance between two samples in a neighborhood is not greater\n            than zero or the number of samples in a neighborhood is not greater than one.\n    \"\"\"\n\n    if data.empty:\n        raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if max_distance <= 0:\n        raise InvalidParameterValueException(\n            \"The input value for the maximum distance between two samples in a neighborhood must be greater than zero.\"\n        )\n\n    if min_samples <= 1:\n        raise InvalidParameterValueException(\n            \"The input value for the minimum number of samples in a neighborhood must be greater than one.\"\n        )\n\n    dbscan_gdf = _dbscan(data, max_distance, min_samples)\n\n    return dbscan_gdf\n
    "},{"location":"exploratory_analyses/descriptive_statistics/","title":"Descriptive statistics","text":""},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_dataframe","title":"descriptive_statistics_dataframe(input_data, column)","text":"

    Generate descriptive statistics from vector data.

    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.

    Parameters:

    Name Type Description Default input_data Union[DataFrame, GeoDataFrame]

    Data to generate descriptive statistics from.

    required column str

    Specify the column to generate descriptive statistics from.

    required

    Returns:

    Type Description dict

    The descriptive statistics in previously described order.

    Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
    @beartype\ndef descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str) -> dict:\n    \"\"\"Generate descriptive statistics from vector data.\n\n    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n\n    Args:\n        input_data: Data to generate descriptive statistics from.\n        column: Specify the column to generate descriptive statistics from.\n\n    Returns:\n        The descriptive statistics in previously described order.\n    \"\"\"\n    if column not in input_data.columns:\n        raise InvalidColumnException\n    data = input_data[column]\n    statistics = _descriptive_statistics(data)\n    return statistics\n
    "},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_raster","title":"descriptive_statistics_raster(input_data)","text":"

    Generate descriptive statistics from raster data.

    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness. Nodata values are removed from the data before the statistics are computed.

    Parameters:

    Name Type Description Default input_data DatasetReader

    Data to generate descriptive statistics from.

    required

    Returns:

    Type Description dict

    The descriptive statistics in previously described order.

    Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
    @beartype\ndef descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:\n    \"\"\"Generate descriptive statistics from raster data.\n\n    Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n    Nodata values are removed from the data before the statistics are computed.\n\n    Args:\n        input_data: Data to generate descriptive statistics from.\n\n    Returns:\n        The descriptive statistics in previously described order.\n    \"\"\"\n    data = input_data.read().flatten()\n    nodata_value = input_data.nodata\n    data = data[data != nodata_value]\n    statistics = _descriptive_statistics(data)\n    return statistics\n
    "},{"location":"exploratory_analyses/feature_importance/","title":"Feature importance","text":""},{"location":"exploratory_analyses/feature_importance/#eis_toolkit.exploratory_analyses.feature_importance.evaluate_feature_importance","title":"evaluate_feature_importance(classifier, x_test, y_test, feature_names, number_of_repetition=50, random_state=0)","text":"

    Evaluate the feature importance of a sklearn classifier or linear model.

    Parameters:

    Name Type Description Default classifier BaseEstimator

    Trained classifier.

    required x_test ndarray

    Testing feature data (X data need to be normalized / standardized).

    required y_test ndarray

    Testing target data.

    required feature_names Sequence[str]

    Names of the feature columns.

    required number_of_repetition int

    Number of iteration used when calculate feature importance (default 50).

    50 random_state int

    random state for repeatability of results (Default 0).

    0

    Return: A dataframe composed by features name and Importance value The resulted object with importance mean, importance std, and overall importance Raises: InvalidDatasetException: When the dataset is None.

    Source code in eis_toolkit/exploratory_analyses/feature_importance.py
    @beartype\ndef evaluate_feature_importance(\n    classifier: sklearn.base.BaseEstimator,\n    x_test: np.ndarray,\n    y_test: np.ndarray,\n    feature_names: Sequence[str],\n    number_of_repetition: int = 50,\n    random_state: int = 0,\n) -> tuple[pd.DataFrame, dict]:\n    \"\"\"\n    Evaluate the feature importance of a sklearn classifier or linear model.\n\n    Parameters:\n        classifier: Trained classifier.\n        x_test: Testing feature data (X data need to be normalized / standardized).\n        y_test: Testing target data.\n        feature_names: Names of the feature columns.\n        number_of_repetition: Number of iteration used when calculate feature importance (default 50).\n        random_state: random state for repeatability of results (Default 0).\n    Return:\n        A dataframe composed by features name and Importance value\n        The resulted object with importance mean, importance std, and overall importance\n    Raises:\n        InvalidDatasetException: When the dataset is None.\n    \"\"\"\n\n    if x_test is None or y_test is None:\n        raise InvalidDatasetException\n\n    result = permutation_importance(\n        classifier, x_test, y_test.ravel(), n_repeats=number_of_repetition, random_state=random_state\n    )\n\n    feature_importance = pd.DataFrame({\"Feature\": feature_names, \"Importance\": result.importances_mean})\n\n    feature_importance[\"Importance\"] = feature_importance[\"Importance\"] * 100\n    feature_importance = feature_importance.sort_values(by=\"Importance\", ascending=False)\n\n    return feature_importance, result\n
    "},{"location":"exploratory_analyses/k_means_cluster/","title":"K-means clustering","text":""},{"location":"exploratory_analyses/k_means_cluster/#eis_toolkit.exploratory_analyses.k_means_cluster.k_means_clustering","title":"k_means_clustering(data, number_of_clusters=None, random_state=None)","text":"

    Perform k-means clustering on the input data.

    Parameters:

    Name Type Description Default data GeoDataFrame

    A GeoDataFrame containing the input data.

    required number_of_clusters Optional[int]

    The number of clusters (>= 1) to form. Optional parameter. If not provided, optimal number of clusters is computed using the elbow method.

    None random_state Optional[int]

    A random number generation for centroid initialization to make the randomness deterministic. Optional parameter.

    None

    Returns:

    Type Description GeoDataFrame

    GeoDataFrame containing assigned cluster labels.

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterException

    The number of clusters is less than one.

    Source code in eis_toolkit/exploratory_analyses/k_means_cluster.py
    @beartype\ndef k_means_clustering(\n    data: gdp.GeoDataFrame, number_of_clusters: Optional[int] = None, random_state: Optional[int] = None\n) -> gdp.GeoDataFrame:\n    \"\"\"\n    Perform k-means clustering on the input data.\n\n    Args:\n        data: A GeoDataFrame containing the input data.\n        number_of_clusters: The number of clusters (>= 1) to form. Optional parameter. If not provided,\n            optimal number of clusters is computed using the elbow method.\n        random_state: A random number generation for centroid initialization to make\n            the randomness deterministic. Optional parameter.\n\n    Returns:\n        GeoDataFrame containing assigned cluster labels.\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterException: The number of clusters is less than one.\n    \"\"\"\n\n    if data.empty:\n        raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if number_of_clusters is not None and number_of_clusters < 1:\n        raise InvalidParameterValueException(\"The input value for number of clusters must be at least one.\")\n\n    k_means_gdf = _k_means_clustering(data, number_of_clusters, random_state)\n\n    return k_means_gdf\n
    "},{"location":"exploratory_analyses/parallel_coordinates/","title":"Plot parallel coordinates","text":""},{"location":"exploratory_analyses/parallel_coordinates/#eis_toolkit.exploratory_analyses.parallel_coordinates.plot_parallel_coordinates","title":"plot_parallel_coordinates(df, color_column_name, plot_title=None, palette_name=None, curved_lines=True)","text":"

    Plot a parallel coordinates plot.

    Automatically removes all rows containing null/nan values. Tries to convert columns to numeric to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only the first 8 to plot.

    Parameters:

    Name Type Description Default df DataFrame

    The DataFrame to plot.

    required color_column_name str

    The name of the column in df to use for color encoding.

    required plot_title Optional[str]

    The title for the plot. Default is None.

    None palette_name Optional[str]

    The name of the color palette to use. Default is None.

    None curved_lines bool

    If True, the plot will have curved instead of straight lines. Default is True.

    True

    Returns:

    Type Description Figure

    A matplotlib figure containing the parallel coordinates plot.

    Raises:

    Type Description EmptyDataFrameException

    Raised when the DataFrame is empty.

    InvalidColumnException

    Raised when the color column is not found in the DataFrame.

    InconsistentDataTypesException

    Raised when the color column has multiple data types.

    Source code in eis_toolkit/exploratory_analyses/parallel_coordinates.py
    @beartype\ndef plot_parallel_coordinates(\n    df: pd.DataFrame,\n    color_column_name: str,\n    plot_title: Optional[str] = None,\n    palette_name: Optional[str] = None,\n    curved_lines: bool = True,\n) -> matplotlib.figure.Figure:\n    \"\"\"Plot a parallel coordinates plot.\n\n    Automatically removes all rows containing null/nan values. Tries to convert columns to numeric\n    to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only\n    the first 8 to plot.\n\n    Args:\n        df: The DataFrame to plot.\n        color_column_name: The name of the column in df to use for color encoding.\n        plot_title: The title for the plot. Default is None.\n        palette_name: The name of the color palette to use. Default is None.\n        curved_lines: If True, the plot will have curved instead of straight lines. Default is True.\n\n    Returns:\n        A matplotlib figure containing the parallel coordinates plot.\n\n    Raises:\n        EmptyDataFrameException: Raised when the DataFrame is empty.\n        InvalidColumnException: Raised when the color column is not found in the DataFrame.\n        InconsistentDataTypesException: Raised when the color column has multiple data types.\n    \"\"\"\n\n    if df.empty:\n        raise exceptions.EmptyDataFrameException(\"The input DataFrame is empty.\")\n\n    if color_column_name not in df.columns:\n        raise exceptions.InvalidColumnException(\n            f\"The provided color column {color_column_name} is not found in the DataFrame.\"\n        )\n\n    df = df.convert_dtypes()\n    df = df.apply(pd.to_numeric, errors=\"ignore\")\n\n    color_data = df[color_column_name].to_numpy()\n    if len(set([type(elem) for elem in color_data])) != 1:\n        raise exceptions.InconsistentDataTypesException(\n            \"The color column should have a consistent datatype. Multiple data types detected in the color column.\"\n        )\n\n    df = df.select_dtypes(include=np.number)\n\n    # Drop non-numeric columns and the column used for coloring\n    columns_to_drop = [color_column_name]\n    for column in df.columns.values:\n        if df[column].isnull().all():\n            columns_to_drop.append(column)\n    df = df.loc[:, ~df.columns.isin(columns_to_drop)]\n\n    # Keep only first 8 columns if more are still present\n    if len(df.columns.values) > 8:\n        df = df.iloc[:, :8]\n\n    data_labels = df.columns.values\n    data = df.to_numpy()\n\n    fig = _plot_parallel_coordinates(\n        data=data,\n        data_labels=data_labels,\n        color_data=color_data,\n        color_column_name=color_column_name,\n        plot_title=plot_title,\n        palette_name=palette_name,\n        curved_lines=curved_lines,\n    )\n    return fig\n
    "},{"location":"exploratory_analyses/pca/","title":"PCA","text":""},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.compute_pca","title":"compute_pca(data, number_of_components, scaler_type='standard', nodata=None, color_column_name=None)","text":"

    Compute given number of principal components for numeric input data.

    Various input data formats are accepted and the output format depends on the input format. If input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can be specified in this case.

    Parameters:

    Name Type Description Default data Union[ndarray, DataFrame, GeoDataFrame, DatasetReader]

    Input data for PCA.

    required number_of_components int

    The number of principal components to compute Should be >= 1 and at most the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.

    required scaler_type Literal['standard', 'min_max', 'robust']

    Transform data according to a specified Sklearn scaler. Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".

    'standard' nodata Optional[Number]

    Define nodata value to be masked out. Optional parameter. If None and input is raster, looks for nodata value from raster metadata. Defaults to None.

    None color_column_name Optional[str]

    If input data is a DataFrame or a GeoDataFrame, column name used for coloring data points in the produced pairplot can be defined. Defaults to None.

    None

    Returns:

    Type Description Union[ndarray, Tuple[DataFrame, PairGrid], Tuple[GeoDataFrame, PairGrid], Tuple[ndarray, Profile]]

    The computed principal components in corresponding format as the input data (for raster, output is

    ndarray

    Numpy array containing the data and raster profile) and the explained variance ratios for each component.

    Raises:

    Type Description EmptyDataException

    The input is empty.

    InvalidNumberOfPrincipalComponents

    The number of principal components is less than 1 or more than number of columns if input was (Geo)DataFrame.

    Source code in eis_toolkit/exploratory_analyses/pca.py
    @beartype\ndef compute_pca(\n    data: Union[np.ndarray, pd.DataFrame, gpd.GeoDataFrame, rasterio.io.DatasetReader],\n    number_of_components: int,\n    scaler_type: Literal[\"standard\", \"min_max\", \"robust\"] = \"standard\",\n    nodata: Optional[Number] = None,\n    color_column_name: Optional[str] = None,\n) -> Tuple[\n    Union[\n        np.ndarray,\n        Tuple[pd.DataFrame, sns.PairGrid],\n        Tuple[gpd.GeoDataFrame, sns.PairGrid],\n        Tuple[np.ndarray, rasterio.profiles.Profile],\n    ],\n    np.ndarray,\n]:\n    \"\"\"\n    Compute given number of principal components for numeric input data.\n\n    Various input data formats are accepted and the output format depends on the input format. If\n    input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can\n    be specified in this case.\n\n    Args:\n        data: Input data for PCA.\n        number_of_components: The number of principal components to compute Should be >= 1 and at most\n            the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.\n        scaler_type: Transform data according to a specified Sklearn scaler.\n            Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".\n        nodata: Define nodata value to be masked out. Optional parameter. If None and input is raster, looks\n            for nodata value from raster metadata. Defaults to None.\n        color_column_name: If input data is a DataFrame or a GeoDataFrame, column name used for\n            coloring data points in the produced pairplot can be defined. Defaults to None.\n\n    Returns:\n        The computed principal components in corresponding format as the input data (for raster, output is\n        Numpy array containing the data and raster profile) and the explained variance ratios for each component.\n\n    Raises:\n        EmptyDataException: The input is empty.\n        InvalidNumberOfPrincipalComponents: The number of principal components is less than 1 or more than\n            number of columns if input was (Geo)DataFrame.\n    \"\"\"\n    if scaler_type not in SCALERS:\n        raise exceptions.InvalidParameterValueException(f\"Invalid scaler. Choose from: {list(SCALERS.keys())}\")\n\n    if number_of_components < 1:\n        raise exceptions.InvalidParameterValueException(\"The number of principal components should be >= 1.\")\n\n    # Get feature matrix (Numpy array) from various input types\n    if isinstance(data, np.ndarray):\n        feature_matrix = data\n        if feature_matrix.ndim == 2:  # Table-like data (assumme it is a DataFrame transformed to Numpy array)\n            feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=False)\n        elif feature_matrix.ndim == 3:  # Assume data represents multiband raster data\n            rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n            feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n        else:\n            raise exceptions.InvalidParameterValueException(\n                f\"Unsupported input data format. {feature_matrix.ndim} dimensions detected.\"\n            )\n        if feature_matrix.size == 0:\n            raise exceptions.EmptyDataException(\"Input array is empty.\")\n\n    elif isinstance(data, rasterio.io.DatasetReader):\n        feature_matrix = data.read()\n        if feature_matrix.ndim < 3:\n            raise exceptions.InvalidParameterValueException(\"Input raster should have multiple bands.\")\n        rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n        if nodata is None:\n            nodata = data.nodata\n        feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n\n    elif isinstance(data, pd.DataFrame):\n        df = data.copy()\n        if df.empty:\n            raise exceptions.EmptyDataException(\"Input DataFrame is empty.\")\n        if number_of_components > len(df.columns):\n            raise exceptions.InvalidParameterValueException(\n                \"The number of principal should be at most the number of numeric columns in the input DataFrame.\"\n            )\n        if color_column_name is not None:\n            color_column_data = df[color_column_name]\n\n        if isinstance(data, gpd.GeoDataFrame):\n            geometries = data.geometry\n            crs = data.crs\n            df = df.drop(columns=[\"geometry\"])\n\n        df = df.convert_dtypes()\n        df = df.apply(pd.to_numeric, errors=\"ignore\")\n        df = df.select_dtypes(include=np.number)\n        df = df.astype(dtype=np.number)\n        feature_matrix = df.to_numpy()\n        feature_matrix = feature_matrix.astype(float)\n        feature_matrix, nan_mask = _handle_missing_values(feature_matrix, nodata)\n\n    # Core PCA computation\n    principal_components, explained_variances = _compute_pca(feature_matrix, number_of_components, scaler_type)\n\n    # Put nodata back in and consider new dimension of data\n    if nodata is not None:\n        principal_components[nan_mask[:, number_of_components]] = nodata\n    else:\n        principal_components[nan_mask[:, :number_of_components]] = np.nan\n\n    # Convert PCA output to proper format\n    if isinstance(data, np.ndarray):\n        if data.ndim == 3:\n            result_data = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n        else:\n            result_data = principal_components\n\n    elif isinstance(data, rasterio.io.DatasetReader):\n        principal_components = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n        out_profile = data.profile.copy()\n        out_profile[\"count\"] = number_of_components\n        out_profile[\"dtype\"] = \"float32\"\n        result_data = (principal_components, out_profile)\n\n    elif isinstance(data, pd.DataFrame):\n        component_names = [f\"principal_component_{i+1}\" for i in range(number_of_components)]\n        pca_df = pd.DataFrame(data=principal_components, columns=component_names)\n        if color_column_name is not None:\n            pca_df[color_column_name] = color_column_data\n        sns_pair_grid = plot_pca(pca_df, explained_variances, color_column_name)\n        if isinstance(data, gpd.GeoDataFrame):\n            pca_df = gpd.GeoDataFrame(pca_df, geometry=geometries, crs=crs)\n        result_data = (pca_df, sns_pair_grid)\n\n    return result_data, explained_variances\n
    "},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.plot_pca","title":"plot_pca(pca_df, explained_variances=None, color_column_name=None, save_path=None)","text":"

    Plot a scatter matrix of different principal component combinations.

    Parameters:

    Name Type Description Default pca_df DataFrame

    A DataFrame containing computed principal components.

    required explained_variances Optional[ndarray]

    The explained variance ratios for each principal component. Used for labeling axes in the plot. Optional parameter. Defaults to None.

    None color_column_name Optional[str]

    Name of the column that will be used for color-coding data points. Typically a categorical variable in the original data. Optional parameter, no colors if not provided. Defaults to None.

    None save_path Optional[str]

    The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.

    None

    Returns:

    Type Description PairGrid

    A Seaborn pairgrid containing the PCA scatter matrix.

    Raises:

    Type Description InvalidColumnException

    DataFrame does not contain the given color column.

    Source code in eis_toolkit/exploratory_analyses/pca.py
    @beartype\ndef plot_pca(\n    pca_df: pd.DataFrame,\n    explained_variances: Optional[np.ndarray] = None,\n    color_column_name: Optional[str] = None,\n    save_path: Optional[str] = None,\n) -> sns.PairGrid:\n    \"\"\"Plot a scatter matrix of different principal component combinations.\n\n    Args:\n        pca_df: A DataFrame containing computed principal components.\n        explained_variances: The explained variance ratios for each principal component. Used for labeling\n            axes in the plot. Optional parameter. Defaults to None.\n        color_column_name: Name of the column that will be used for color-coding data points. Typically a\n            categorical variable in the original data. Optional parameter, no colors if not provided.\n            Defaults to None.\n        save_path: The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.\n\n    Returns:\n        A Seaborn pairgrid containing the PCA scatter matrix.\n\n    Raises:\n        InvalidColumnException: DataFrame does not contain the given color column.\n    \"\"\"\n\n    if color_column_name and color_column_name not in pca_df.columns:\n        raise exceptions.InvalidColumnException(\"DataFrame does not contain the given color column.\")\n\n    pair_grid = sns.pairplot(pca_df, hue=color_column_name)\n\n    # Add explained variances to axis labels if provided\n    if explained_variances is not None:\n        labels = [f\"PC {i+1} ({var:.1f}%)\" for i, var in enumerate(explained_variances * 100)]\n    else:\n        labels = [f\"PC {i+1}\" for i in range(len(pair_grid.axes))]\n\n    # Iterate over axes objects and set the labels\n    for i, ax_row in enumerate(pair_grid.axes):\n        for j, ax in enumerate(ax_row):\n            if j == 0:  # Only the first column\n                ax.set_ylabel(labels[i], fontsize=\"large\")\n            if i == len(ax_row) - 1:  # Only the last row\n                ax.set_xlabel(labels[j], fontsize=\"large\")\n\n    if save_path is not None:\n        plt.savefig(save_path)\n\n    return pair_grid\n
    "},{"location":"exploratory_analyses/statistical_testing/","title":"Statistical (hypothesis) testing","text":""},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.chi_square_test","title":"chi_square_test(data, target_column, columns=None)","text":"

    Compute Chi-square test for independence on the input data.

    It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings, booleans or integers, but not floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data

    required target_column str

    Variable against which independence of other variables is tested.

    required columns Optional[Sequence[str]]

    Variables that are tested against the variable in target_column. If None, every column is used.

    None

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    InvalidParameterValueException

    The target_column is not in input Dataframe or invalid column is provided.

    Returns:

    Type Description dict

    Test statistics for each variable (except target_column).

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef chi_square_test(data: pd.DataFrame, target_column: str, columns: Optional[Sequence[str]] = None) -> dict:\n    \"\"\"Compute Chi-square test for independence on the input data.\n\n    It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings,\n    booleans or integers, but not floats.\n\n    Args:\n        data: Dataframe containing the input data\n        target_column: Variable against which independence of other variables is tested.\n        columns: Variables that are tested against the variable in target_column. If None, every column is used.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n        InvalidParameterValueException: The target_column is not in input Dataframe or invalid column is provided.\n\n    Returns:\n        Test statistics for each variable (except target_column).\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    if not check_columns_valid(data, [target_column]):\n        raise exceptions.InvalidParameterValueException(\"Target column not found in the Dataframe.\")\n\n    if columns is not None:\n        invalid_columns = [column for column in columns if column not in data.columns]\n        if any(invalid_columns):\n            raise exceptions.InvalidParameterValueException(\n                f\"The following variables are not in the dataframe: {invalid_columns}\"\n            )\n    else:\n        columns = data.columns\n\n    statistics = {}\n    for column in columns:\n        if column != target_column:\n            contingency_table = pd.crosstab(data[target_column], data[column])\n            chi_square, p_value, degrees_of_freedom, _ = chi2_contingency(contingency_table)\n            statistics[column] = (chi_square, p_value, degrees_of_freedom)\n\n    return statistics\n
    "},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.correlation_matrix","title":"correlation_matrix(data, correlation_method='pearson', min_periods=None)","text":"

    Compute correlation matrix on the input data.

    It is assumed that the data is numeric, i.e. integers or floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data.

    required correlation_method Literal[pearson, kendall, spearman]

    'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.

    'pearson' min_periods Optional[int]

    Minimum number of observations required per pair of columns to have valid result. Optional.

    None

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    InvalidParameterValueException

    min_periods argument is used with method 'kendall'.

    Returns:

    Type Description DataFrame

    Dataframe containing the correlation matrix

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef correlation_matrix(\n    data: pd.DataFrame,\n    correlation_method: Literal[\"pearson\", \"kendall\", \"spearman\"] = \"pearson\",\n    min_periods: Optional[int] = None,\n) -> pd.DataFrame:\n    \"\"\"Compute correlation matrix on the input data.\n\n    It is assumed that the data is numeric, i.e. integers or floats.\n\n    Args:\n        data: Dataframe containing the input data.\n        correlation_method: 'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.\n        min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n        InvalidParameterValueException: min_periods argument is used with method 'kendall'.\n\n    Returns:\n        Dataframe containing the correlation matrix\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    if correlation_method == \"kendall\" and min_periods is not None:\n        raise exceptions.InvalidParameterValueException(\n            \"The argument min_periods is available only with correlation methods 'pearson' and 'spearman'.\"\n        )\n\n    matrix = data.corr(method=correlation_method, min_periods=min_periods, numeric_only=True)\n\n    return matrix\n
    "},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.covariance_matrix","title":"covariance_matrix(data, min_periods=None, delta_degrees_of_freedom=1)","text":"

    Compute covariance matrix on the input data.

    It is assumed that the data is numeric, i.e. integers or floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data.

    required min_periods Optional[int]

    Minimum number of observations required per pair of columns to have valid result. Optional.

    None delta_degrees_of_freedom int

    Delta degrees of freedom used for computing covariance matrix. Defaults to 1.

    1

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    InvalidParameterValueException

    Provided value for delta_degrees_of_freedom is negative.

    Returns:

    Type Description DataFrame

    Dataframe containing the covariance matrix

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef covariance_matrix(\n    data: pd.DataFrame, min_periods: Optional[int] = None, delta_degrees_of_freedom: int = 1\n) -> pd.DataFrame:\n    \"\"\"Compute covariance matrix on the input data.\n\n    It is assumed that the data is numeric, i.e. integers or floats.\n\n    Args:\n        data: Dataframe containing the input data.\n        min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n        delta_degrees_of_freedom: Delta degrees of freedom used for computing covariance matrix. Defaults to 1.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n        InvalidParameterValueException: Provided value for delta_degrees_of_freedom is negative.\n\n    Returns:\n        Dataframe containing the covariance matrix\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    if delta_degrees_of_freedom < 0:\n        raise exceptions.InvalidParameterValueException(\"Delta degrees of freedom must be non-negative.\")\n\n    matrix = data.cov(min_periods=min_periods, ddof=delta_degrees_of_freedom)\n\n    return matrix\n
    "},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.normality_test","title":"normality_test(data)","text":"

    Compute Shapiro-Wilk test for normality on the input data.

    It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.

    Parameters:

    Name Type Description Default data DataFrame

    Dataframe containing the input data.

    required

    Returns:

    Type Description dict

    Test statistics for each variable.

    Raises:

    Type Description EmptyDataFrameException

    The input Dataframe is empty.

    Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
    @beartype\ndef normality_test(data: pd.DataFrame) -> dict:\n    \"\"\"Compute Shapiro-Wilk test for normality on the input data.\n\n    It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.\n\n    Args:\n        data: Dataframe containing the input data.\n\n    Returns:\n        Test statistics for each variable.\n\n    Raises:\n        EmptyDataFrameException: The input Dataframe is empty.\n    \"\"\"\n    if check_empty_dataframe(data):\n        raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n    statistics = {}\n    for column in data.columns:\n        statistic, p_value = shapiro(data[column])\n        statistics[column] = (statistic, p_value)\n\n    return statistics\n
    "},{"location":"prediction/fuzzy_overlay/","title":"Fuzzy overlay","text":""},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.and_overlay","title":"and_overlay(data)","text":"

    Compute an 'and' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef and_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute an 'and' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return data.min(axis=0)\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.gamma_overlay","title":"gamma_overlay(data, gamma)","text":"

    Compute a 'gamma' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required gamma float

    The gamma parameter. With gamma value 0, result will be same as 'product'overlay. When gamma is closer to 1, the weight of 'sum' overlay is increased. Value must be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values or gamma are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef gamma_overlay(data: np.ndarray, gamma: float) -> np.ndarray:\n    \"\"\"Compute a 'gamma' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n        gamma: The gamma parameter. With gamma value 0, result will be same as 'product'overlay.\n            When gamma is closer to 1, the weight of 'sum' overlay is increased.\n            Value must be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values or gamma are not in range [0, 1].\n    \"\"\"\n    if gamma < 0 or gamma > 1:\n        raise exceptions.InvalidParameterValueException(\"The gamma parameter must be in range [0, 1]\")\n\n    sum = sum_overlay(data=data)\n    product = product_overlay(data=data)\n    return product ** (1 - gamma) * sum**gamma\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.or_overlay","title":"or_overlay(data)","text":"

    Compute an 'or' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef or_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute an 'or' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return data.max(axis=0)\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.product_overlay","title":"product_overlay(data)","text":"

    Compute a 'product' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef product_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute a 'product' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return np.prod(data, axis=0)\n
    "},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.sum_overlay","title":"sum_overlay(data)","text":"

    Compute a 'sum' overlay operation with fuzzy logic.

    Parameters:

    Name Type Description Default data ndarray

    The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].

    required

    Returns:

    Type Description ndarray

    2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].

    Raises:

    Type Description InvalidParameterValueException

    If data values are not in range [0, 1].

    Source code in eis_toolkit/prediction/fuzzy_overlay.py
    @beartype\ndef sum_overlay(data: np.ndarray) -> np.ndarray:\n    \"\"\"Compute a 'sum' overlay operation with fuzzy logic.\n\n    Args:\n        data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n            Data points should be in the range [0, 1].\n\n    Returns:\n        2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].\n\n    Raises:\n        InvalidParameterValueException: If data values are not in range [0, 1].\n    \"\"\"\n    _check_input_data(data=data)\n\n    return data.sum(axis=0) - np.prod(data, axis=0)\n
    "},{"location":"prediction/weights_of_evidence/","title":"Weights of evidence","text":""},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_responses","title":"weights_of_evidence_calculate_responses(output_arrays, nr_of_deposits, nr_of_pixels)","text":"

    Calculate the posterior probabilities for the given generalized weight arrays.

    Parameters:

    Name Type Description Default output_arrays Sequence[Dict[str, ndarray]]

    List of output array dictionaries returned by weights of evidence calculations. For each dictionary, generalized weight and generalized standard deviation arrays are used and summed together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found, the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).

    required nr_of_deposits int

    Number of deposit pixels in the input data for weights of evidence calculations.

    required nr_of_pixels int

    Number of evidence pixels in the input data for weights of evidence calculations.

    required

    Returns:

    Type Description ndarray

    Array of posterior probabilites.

    ndarray

    Array of standard deviations in the posterior probability calculations.

    ndarray

    Array of confidence of the prospectivity values obtained in the posterior probability array.

    Source code in eis_toolkit/prediction/weights_of_evidence.py
    @beartype\ndef weights_of_evidence_calculate_responses(\n    output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int\n) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:\n    \"\"\"Calculate the posterior probabilities for the given generalized weight arrays.\n\n    Args:\n        output_arrays: List of output array dictionaries returned by weights of evidence calculations.\n            For each dictionary, generalized weight and generalized standard deviation arrays are used and summed\n            together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found,\n            the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).\n        nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.\n        nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations.\n\n    Returns:\n        Array of posterior probabilites.\n        Array of standard deviations in the posterior probability calculations.\n        Array of confidence of the prospectivity values obtained in the posterior probability array.\n    \"\"\"\n    gen_weights_sum = sum(\n        [\n            item[GENERALIZED_WEIGHT_PLUS_COLUMN]\n            if GENERALIZED_WEIGHT_PLUS_COLUMN in item.keys()\n            else item[WEIGHT_PLUS_COLUMN]\n            for item in output_arrays\n        ]\n    )\n    gen_weights_variance_sum = sum(\n        [\n            np.square(item[GENERALIZED_S_WEIGHT_PLUS_COLUMN])\n            if GENERALIZED_S_WEIGHT_PLUS_COLUMN in item.keys()\n            else np.square(item[WEIGHT_S_PLUS_COLUMN])\n            for item in output_arrays\n        ]\n    )\n\n    prior_probabilities = nr_of_deposits / nr_of_pixels\n    prior_odds = np.log(prior_probabilities / (1 - prior_probabilities))\n    posterior_probabilities = np.exp(gen_weights_sum + prior_odds) / (1 + np.exp(gen_weights_sum + prior_odds))\n\n    posterior_probabilities_squared = np.square(posterior_probabilities)\n    posterior_probabilities_std = np.sqrt(\n        (1 / nr_of_deposits + gen_weights_variance_sum) * posterior_probabilities_squared\n    )\n\n    confidence_array = posterior_probabilities / posterior_probabilities_std\n    return posterior_probabilities, posterior_probabilities_std, confidence_array\n
    "},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_weights","title":"weights_of_evidence_calculate_weights(evidential_raster, deposits, raster_nodata=None, weights_type='unique', studentized_contrast_threshold=1, arrays_to_generate=None)","text":"

    Calculate weights of spatial associations.

    Parameters:

    Name Type Description Default evidential_raster DatasetReader

    The evidential raster.

    required deposits GeoDataFrame

    Vector data representing the mineral deposits or occurences point data.

    required raster_nodata Optional[Number]

    If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None (nodata from raster metadata is used).

    None weights_type Literal[unique, categorical, ascending, descending]

    Accepted values are 'unique', 'categorical', 'ascending' and 'descending'. Unique weights does not create generalized classes and does not use a studentized contrast threshold value while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so that all classes with studentized contrast below the defined threshold are grouped into one generalized class. Cumulative ascending and descending weights find the class with max contrast and group classes above/below into generalized classes. Generalized weights are also calculated for generalized classes.

    'unique' studentized_contrast_threshold Number

    Studentized contrast threshold value used with 'categorical', 'ascending' and 'descending' weight types. Used either as reclassification threshold directly (categorical) or to check that class with max contrast has studentized contrast value at least the defined value (cumulative). Defaults to 1.

    1 arrays_to_generate Optional[Sequence[str]]

    Arrays to generate from the computed weight metrics. All column names in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+] for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"] for the cumulative weight types.

    None

    Returns:

    Type Description DataFrame

    Dataframe with weights of spatial association between the input data.

    dict

    Dictionary of arrays for specified metrics.

    dict

    Raster metadata.

    int

    Number of deposit pixels.

    int

    Number of all evidence pixels.

    Source code in eis_toolkit/prediction/weights_of_evidence.py
    @beartype\ndef weights_of_evidence_calculate_weights(\n    evidential_raster: rasterio.io.DatasetReader,\n    deposits: gpd.GeoDataFrame,\n    raster_nodata: Optional[Number] = None,\n    weights_type: Literal[\"unique\", \"categorical\", \"ascending\", \"descending\"] = \"unique\",\n    studentized_contrast_threshold: Number = 1,\n    arrays_to_generate: Optional[Sequence[str]] = None,\n) -> Tuple[pd.DataFrame, dict, dict, int, int]:\n    \"\"\"\n    Calculate weights of spatial associations.\n\n    Args:\n        evidential_raster: The evidential raster.\n        deposits: Vector data representing the mineral deposits or occurences point data.\n        raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None\n            (nodata from raster metadata is used).\n        weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'.\n            Unique weights does not create generalized classes and does not use a studentized contrast threshold value\n            while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so\n            that all classes with studentized contrast below the defined threshold are grouped into one generalized\n            class. Cumulative ascending and descending weights find the class with max contrast and group classes\n            above/below into generalized classes. Generalized weights are also calculated for generalized classes.\n        studentized_contrast_threshold: Studentized contrast threshold value used with 'categorical', 'ascending' and\n            'descending' weight types. Used either as reclassification threshold directly (categorical) or to check\n            that class with max contrast has studentized contrast value at least the defined value (cumulative).\n            Defaults to 1.\n        arrays_to_generate: Arrays to generate from the computed weight metrics. All column names\n            in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+]\n            for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"]\n            for the cumulative weight types.\n\n    Returns:\n        Dataframe with weights of spatial association between the input data.\n        Dictionary of arrays for specified metrics.\n        Raster metadata.\n        Number of deposit pixels.\n        Number of all evidence pixels.\n    \"\"\"\n\n    if arrays_to_generate is None:\n        if weights_type == \"unique\":\n            metrics_to_arrays = DEFAULT_METRICS_UNIQUE\n        else:\n            metrics_to_arrays = DEFAULT_METRICS_CUMULATIVE\n    else:\n        for col_name in arrays_to_generate:\n            if col_name not in VALID_DF_COLUMNS:\n                raise exceptions.InvalidColumnException(\n                    f\"Arrays to generate contains invalid metric / column name: {col_name}.\"\n                )\n        metrics_to_arrays = arrays_to_generate.copy()\n\n    # 1. Preprocess data\n    evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata)\n    raster_meta = evidential_raster.meta\n\n    # Rasterize deposits\n    deposit_array, _ = rasterize_vector(\n        geodataframe=deposits, default_value=1.0, base_raster_profile=raster_meta, fill_value=0.0\n    )\n\n    # Mask NaN out of the array\n    nodata_mask = np.isnan(evidence_array)\n    masked_evidence_array = evidence_array[~nodata_mask]\n    masked_deposit_array = deposit_array[~nodata_mask]\n\n    # 2. WofE calculations\n    if weights_type == \"unique\" or weights_type == \"categorical\":\n        wofe_weights = _unique_weights(masked_deposit_array, masked_evidence_array)\n    elif weights_type == \"ascending\":\n        wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=True)\n    elif weights_type == \"descending\":\n        wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=False)\n    else:\n        raise exceptions.InvalidParameterValueException(\n            \"Expected weights_type to be one of unique, categorical, ascending or descending.\"\n        )\n\n    # 3. Create DataFrame based on calculated metrics\n    df_entries = []\n    for cls, metrics in wofe_weights.items():\n        metrics = [round(metric, 4) if isinstance(metric, np.floating) else metric for metric in metrics]\n        A, _, C, _, w_plus, s_w_plus, w_minus, s_w_minus, contrast, s_contrast, studentized_contrast = metrics\n        df_entries.append(\n            {\n                CLASS_COLUMN: cls,\n                PIXEL_COUNT_COLUMN: A + C,\n                DEPOSIT_COUNT_COLUMN: A,\n                WEIGHT_PLUS_COLUMN: w_plus,\n                WEIGHT_S_PLUS_COLUMN: s_w_plus,\n                WEIGHT_MINUS_COLUMN: w_minus,\n                WEIGHT_S_MINUS_COLUMN: s_w_minus,\n                CONTRAST_COLUMN: contrast,\n                S_CONTRAST_COLUMN: s_contrast,\n                STUDENTIZED_CONTRAST_COLUMN: studentized_contrast,\n            }\n        )\n    weights_df = pd.DataFrame(df_entries)\n\n    # 4. If we use cumulative weights type, calculate generalized classes and weights\n    if weights_type == \"categorical\":\n        weights_df = _generalized_classes_categorical(weights_df, studentized_contrast_threshold)\n        weights_df = _generalized_weights_categorical(weights_df, masked_deposit_array)\n    elif weights_type == \"ascending\" or weights_type == \"descending\":\n        weights_df = _generalized_classes_cumulative(weights_df, studentized_contrast_threshold)\n        weights_df = _generalized_weights_cumulative(weights_df, masked_deposit_array)\n\n    # 5. Generate arrays for desired metrics\n    arrays_dict = _generate_arrays_from_metrics(evidence_array, weights_df, metrics_to_arrays)\n\n    # Return nr. of deposit pixels  and nr. of all evidence pixels for to be used in calculate responses\n    nr_of_deposits = int(np.sum(masked_deposit_array == 1))\n    nr_of_pixels = int(np.size(masked_evidence_array))\n\n    return weights_df, arrays_dict, raster_meta, nr_of_deposits, nr_of_pixels\n
    "},{"location":"raster_processing/check_raster_grids/","title":"Check raster grids","text":""},{"location":"raster_processing/check_raster_grids/#eis_toolkit.raster_processing.check_raster_grids.check_raster_grids","title":"check_raster_grids(rasters, same_extent=False)","text":"

    Check the set of input rasters for matching gridding and optionally matching bounds.

    Parameters:

    Name Type Description Default rasters List[DatasetReader]

    List of rasters to test for matching gridding.

    required same_extent bool

    optional boolean argument that determines if rasters are tested for matching bounds. Default set to False.

    False

    Returns:

    Type Description bool

    True if gridding and optionally bounds matches, False if not.

    Source code in eis_toolkit/raster_processing/check_raster_grids.py
    def check_raster_grids(  # type: ignore[no-any-unimported]\n    rasters: List[rasterio.io.DatasetReader], same_extent: bool = False\n) -> bool:\n    \"\"\"\n    Check the set of input rasters for matching gridding and optionally matching bounds.\n\n    Args:\n        rasters: List of rasters to test for matching gridding.\n        same_extent: optional boolean argument that determines if rasters are tested for matching bounds.\n            Default set to False.\n\n    Returns:\n        True if gridding and optionally bounds matches, False if not.\n    \"\"\"\n    check = _check_raster_grids(rasters=rasters, same_extent=same_extent)\n    return check\n
    "},{"location":"raster_processing/clipping/","title":"Clipping","text":""},{"location":"raster_processing/clipping/#eis_toolkit.raster_processing.clipping.clip_raster","title":"clip_raster(raster, geodataframe)","text":"

    Clips a raster with polygon geometries.

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be clipped.

    required geodataframe GeoDataFrame

    A geodataframe containing the geometries to do the clipping with. Should contain only polygon features.

    required

    Returns:

    Type Description ndarray

    The clipped raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NonMatchingCrsException

    The raster and geodataframe are not in the same CRS.

    NotApplicableGeometryTypeException

    The input geometries contain non-polygon features.

    Source code in eis_toolkit/raster_processing/clipping.py
    @beartype\ndef clip_raster(raster: rasterio.io.DatasetReader, geodataframe: geopandas.GeoDataFrame) -> Tuple[np.ndarray, dict]:\n    \"\"\"Clips a raster with polygon geometries.\n\n    Args:\n        raster: The raster to be clipped.\n        geodataframe: A geodataframe containing the geometries to do the clipping with.\n            Should contain only polygon features.\n\n    Returns:\n        The clipped raster data.\n        The updated metadata.\n\n    Raises:\n        NonMatchingCrsException: The raster and geodataframe are not in the same CRS.\n        NotApplicableGeometryTypeException: The input geometries contain non-polygon features.\n    \"\"\"\n    geometries = geodataframe[\"geometry\"]\n\n    if not check_matching_crs(\n        objects=[raster, geometries],\n    ):\n        raise NonMatchingCrsException(\"The raster and geodataframe are not in the same CRS.\")\n\n    if not check_geometry_types(\n        geometries=geometries,\n        allowed_types=[\"Polygon\", \"MultiPolygon\"],\n    ):\n        raise NotApplicableGeometryTypeException(\"The input geometries contain non-polygon features.\")\n\n    out_image, out_meta = _clip_raster(\n        raster=raster,\n        geometries=geometries,\n    )\n\n    return out_image, out_meta\n
    "},{"location":"raster_processing/create_constant_raster/","title":"Create constant raster","text":""},{"location":"raster_processing/create_constant_raster/#eis_toolkit.raster_processing.create_constant_raster.create_constant_raster","title":"create_constant_raster(constant_value, template_raster=None, coord_west=None, coord_north=None, coord_east=None, coord_south=None, target_epsg=None, target_pixel_size=None, raster_width=None, raster_height=None, nodata_value=None)","text":"

    Create a constant raster based on a user-defined value.

    Provide 3 methods for raster creation: 1. Set extent and coordinate system based on a template raster. 2. Set extent from origin, based on the western and northern coordinates and the pixel size. 3. Set extent from bounds, based on western, northern, eastern and southern points.

    Always provide values for height and width for the last two options, which correspond to the desired number of pixels for rows and columns.

    Parameters:

    Name Type Description Default constant_value Number

    The constant value to use in the raster.

    required template_raster Optional[DatasetReader]

    An optional raster to use as a template for the output.

    None coord_west Optional[Number]

    The western coordinate of the output raster in [m].

    None coord_east Optional[Number]

    The eastern coordinate of the output raster in [m].

    None coord_south Optional[Number]

    The southern coordinate of the output raster in [m].

    None coord_north Optional[Number]

    The northern coordinate of the output raster in [m].

    None target_epsg Optional[int]

    The EPSG code for the output raster.

    None target_pixel_size Optional[int]

    The pixel size of the output raster.

    None raster_width Optional[int]

    The width of the output raster.

    None raster_height Optional[int]

    The height of the output raster.

    None nodata_value Optional[Number]

    The nodata value of the output raster.

    None

    Returns:

    Type Description Tuple[ndarray, dict]

    A tuple containing the output raster as a NumPy array and updated metadata.

    Raises:

    Type Description InvalidParameterValueException

    Provide invalid input parameter.

    Source code in eis_toolkit/raster_processing/create_constant_raster.py
    @beartype\ndef create_constant_raster(  # type: ignore[no-any-unimported]\n    constant_value: Number,\n    template_raster: Optional[rasterio.io.DatasetReader] = None,\n    coord_west: Optional[Number] = None,\n    coord_north: Optional[Number] = None,\n    coord_east: Optional[Number] = None,\n    coord_south: Optional[Number] = None,\n    target_epsg: Optional[int] = None,\n    target_pixel_size: Optional[int] = None,\n    raster_width: Optional[int] = None,\n    raster_height: Optional[int] = None,\n    nodata_value: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Create a constant raster based on a user-defined value.\n\n    Provide 3 methods for raster creation:\n    1. Set extent and coordinate system based on a template raster.\n    2. Set extent from origin, based on the western and northern coordinates and the pixel size.\n    3. Set extent from bounds, based on western, northern, eastern and southern points.\n\n    Always provide values for height and width for the last two options, which correspond to\n    the desired number of pixels for rows and columns.\n\n    Args:\n        constant_value: The constant value to use in the raster.\n        template_raster: An optional raster to use as a template for the output.\n        coord_west: The western coordinate of the output raster in [m].\n        coord_east: The eastern coordinate of the output raster in [m].\n        coord_south: The southern coordinate of the output raster in [m].\n        coord_north: The northern coordinate of the output raster in [m].\n        target_epsg: The EPSG code for the output raster.\n        target_pixel_size: The pixel size of the output raster.\n        raster_width: The width of the output raster.\n        raster_height: The height of the output raster.\n        nodata_value: The nodata value of the output raster.\n\n    Returns:\n        A tuple containing the output raster as a NumPy array and updated metadata.\n\n    Raises:\n        InvalidParameterValueException: Provide invalid input parameter.\n    \"\"\"\n\n    if template_raster is not None:\n        out_array, out_meta = _create_constant_raster_from_template(constant_value, template_raster, nodata_value)\n\n    elif all(coords is not None for coords in [coord_west, coord_east, coord_south, coord_north]):\n        if raster_height <= 0 or raster_width <= 0:\n            raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n        if not check_minmax_position((coord_west, coord_east) or not check_minmax_position((coord_south, coord_north))):\n            raise InvalidParameterValueException(\"Invalid coordinate values provided.\")\n\n        out_array, out_meta = _create_constant_raster_from_bounds(\n            constant_value,\n            coord_west,\n            coord_north,\n            coord_east,\n            coord_south,\n            target_epsg,\n            raster_width,\n            raster_height,\n            nodata_value,\n        )\n\n    elif all(coords is not None for coords in [coord_west, coord_north]) and all(\n        coords is None for coords in [coord_east, coord_south]\n    ):\n        if raster_height <= 0 or raster_width <= 0:\n            raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n        if target_pixel_size <= 0:\n            raise InvalidParameterValueException(\"Invalid pixel size.\")\n\n        out_array, out_meta = _create_constant_raster_from_origin(\n            constant_value,\n            coord_west,\n            coord_north,\n            target_epsg,\n            target_pixel_size,\n            raster_width,\n            raster_height,\n            nodata_value,\n        )\n\n    else:\n        raise InvalidParameterValueException(\"Suitable parameter values were not provided for any of the 3 methods.\")\n\n    constant_value = cast_scalar_to_int(constant_value)\n    nodata_value = cast_scalar_to_int(out_meta[\"nodata\"])\n\n    if isinstance(constant_value, int) and isinstance(nodata_value, int):\n        target_dtype = np.result_type(get_min_int_type(constant_value), get_min_int_type(nodata_value))\n        out_array = out_array.astype(target_dtype)\n        out_meta[\"dtype\"] = out_array.dtype\n    elif isinstance(constant_value, int) and isinstance(nodata_value, float):\n        out_array = out_array.astype(get_min_int_type(constant_value))\n        out_meta[\"dtype\"] = np.float64.__name__\n    elif isinstance(constant_value, float):\n        out_array = out_array.astype(np.float64)\n        out_meta[\"dtype\"] = out_array.dtype\n\n    return out_array, out_meta\n
    "},{"location":"raster_processing/extract_values_from_raster/","title":"Extract values from raster","text":""},{"location":"raster_processing/extract_values_from_raster/#eis_toolkit.raster_processing.extract_values_from_raster.extract_values_from_raster","title":"extract_values_from_raster(raster_list, geodataframe, raster_column_names=None)","text":"

    Extract raster values using point data to a DataFrame.

    If custom column names are not given, column names are file_name for singleband files and file_name_bandnumber for multiband files. If custom column names are given, there should be column names for each raster provided in the raster list.

    Parameters:

    Name Type Description Default raster_list Sequence[DatasetReader]

    List to extract values from.

    required geodataframe GeoDataFrame

    Object to extract values with.

    required raster_column_names Optional[Sequence[str]]

    List of optional column names for bands.

    None

    Returns:

    Type Description DataFrame

    Dataframe with x & y coordinates and the values from the raster file(s) as columns.

    Raises:

    Type Description NonMatchingParameterLengthsException

    raster_list and raster_columns_names have different lengths.

    Source code in eis_toolkit/raster_processing/extract_values_from_raster.py
    @beartype\ndef extract_values_from_raster(\n    raster_list: Sequence[rasterio.io.DatasetReader],\n    geodataframe: gpd.GeoDataFrame,\n    raster_column_names: Optional[Sequence[str]] = None,\n) -> pd.DataFrame:\n    \"\"\"Extract raster values using point data to a DataFrame.\n\n       If custom column names are not given, column names are file_name for singleband files\n       and file_name_bandnumber for multiband files. If custom column names are given, there\n       should be column names for each raster provided in the raster list.\n\n    Args:\n        raster_list: List to extract values from.\n        geodataframe: Object to extract values with.\n        raster_column_names: List of optional column names for bands.\n\n    Returns:\n        Dataframe with x & y coordinates and the values from the raster file(s) as columns.\n\n    Raises:\n        NonMatchingParameterLengthsException: raster_list and raster_columns_names have different lengths.\n    \"\"\"\n    if raster_column_names == []:\n        raster_column_names = None\n\n    if raster_column_names is not None and len(raster_list) != len(raster_column_names):\n        raise NonMatchingParameterLengthsException(\"Raster list and raster columns names have different lengths.\")\n\n    data_frame = _extract_values_from_raster(\n        raster_list=raster_list, geodataframe=geodataframe, raster_column_names=raster_column_names\n    )\n\n    return data_frame\n
    "},{"location":"raster_processing/reprojecting/","title":"Reprojecting","text":""},{"location":"raster_processing/reprojecting/#eis_toolkit.raster_processing.reprojecting.reproject_raster","title":"reproject_raster(raster, target_crs, resampling_method=warp.Resampling.nearest)","text":"

    Reprojects raster to match given coordinate reference system (EPSG).

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be reprojected.

    required target_crs int

    Target CRS as EPSG code.

    required resampling_method Resampling

    Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.

    nearest

    Returns:

    Type Description ndarray

    The reprojected raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NonMatchinCrsException

    Raster is already in the target CRS.

    Source code in eis_toolkit/raster_processing/reprojecting.py
    @beartype\ndef reproject_raster(\n    raster: rasterio.io.DatasetReader, target_crs: int, resampling_method: warp.Resampling = warp.Resampling.nearest\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Reprojects raster to match given coordinate reference system (EPSG).\n\n    Args:\n        raster: The raster to be reprojected.\n        target_crs: Target CRS as EPSG code.\n        resampling_method: Resampling method. Most suitable method depends on the dataset and context.\n            Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.\n\n    Returns:\n        The reprojected raster data.\n        The updated metadata.\n\n    Raises:\n        NonMatchinCrsException: Raster is already in the target CRS.\n    \"\"\"\n    if target_crs == int(raster.crs.to_string()[5:]):\n        raise MatchingCrsException(\"Raster is already in the target CRS.\")\n\n    out_image, out_meta = _reproject_raster(raster, target_crs, resampling_method)\n\n    return out_image, out_meta\n
    "},{"location":"raster_processing/resampling/","title":"Resampling","text":""},{"location":"raster_processing/resampling/#eis_toolkit.raster_processing.resampling.resample","title":"resample(raster, resolution, resampling_method=Resampling.bilinear)","text":"

    Resamples raster according to given resolution.

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be resampled.

    required resolution Number

    Target resolution i.e. cell size of the output raster.

    required resampling_method Resampling

    Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to bilinear.

    bilinear

    Returns:

    Type Description ndarray

    The resampled raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NumericValueSignException

    Resolution is not a positive value.

    Source code in eis_toolkit/raster_processing/resampling.py
    @beartype\ndef resample(\n    raster: rasterio.io.DatasetReader,\n    resolution: Number,\n    resampling_method: Resampling = Resampling.bilinear,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Resamples raster according to given resolution.\n\n    Args:\n        raster: The raster to be resampled.\n        resolution: Target resolution i.e. cell size of the output raster.\n        resampling_method: Resampling method. Most suitable\n            method depends on the dataset and context. Nearest, bilinear and cubic are some\n            common choices. This parameter defaults to bilinear.\n\n    Returns:\n        The resampled raster data.\n        The updated metadata.\n\n    Raises:\n        NumericValueSignException: Resolution is not a positive value.\n    \"\"\"\n    if resolution <= 0:\n        raise exceptions.NumericValueSignException(f\"Expected a positive value for resolution: {resolution})\")\n\n    out_image, out_meta = _resample(raster, resolution, resampling_method)\n    return out_image, out_meta\n
    "},{"location":"raster_processing/snapping/","title":"Snapping","text":""},{"location":"raster_processing/snapping/#eis_toolkit.raster_processing.snapping.snap_with_raster","title":"snap_with_raster(raster, snap_raster)","text":"

    Snaps/aligns raster to given snap raster.

    Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction. If rasters are aligned, simply returns input raster data and metadata.

    Parameters:

    Name Type Description Default raster DatasetReader

    The raster to be clipped.

    required snap_raster DatasetReader

    The snap raster i.e. reference grid raster.

    required

    Returns:

    Type Description ndarray

    The snapped raster data.

    dict

    The updated metadata.

    Raises:

    Type Description NonMatchingCrsException

    Raster and and snap raster are not in the same CRS.

    MatchingRasterGridException

    Raster grids are already aligned.

    Source code in eis_toolkit/raster_processing/snapping.py
    @beartype\ndef snap_with_raster(raster: rasterio.DatasetReader, snap_raster: rasterio.DatasetReader) -> Tuple[np.ndarray, dict]:\n    \"\"\"Snaps/aligns raster to given snap raster.\n\n    Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction.\n    If rasters are aligned, simply returns input raster data and metadata.\n\n    Args:\n        raster: The raster to be clipped.\n        snap_raster: The snap raster i.e. reference grid raster.\n\n    Returns:\n        The snapped raster data.\n        The updated metadata.\n\n    Raises:\n        NonMatchingCrsException: Raster and and snap raster are not in the same CRS.\n        MatchingRasterGridException: Raster grids are already aligned.\n    \"\"\"\n\n    if not check_matching_crs(\n        objects=[raster, snap_raster],\n    ):\n        raise NonMatchingCrsException(\"Raster and and snap raster have different CRS.\")\n\n    if snap_raster.bounds.bottom == raster.bounds.bottom and snap_raster.bounds.left == raster.bounds.left:\n        raise MatchingRasterGridException(\"Raster grids are already aligned.\")\n\n    out_image, out_meta = _snap(raster, snap_raster)\n    return out_image, out_meta\n
    "},{"location":"raster_processing/unifying/","title":"Unifying","text":""},{"location":"raster_processing/unifying/#eis_toolkit.raster_processing.unifying.unify_raster_grids","title":"unify_raster_grids(base_raster, rasters_to_unify, resampling_method=Resampling.nearest, same_extent=False)","text":"

    Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.

    Parameters:

    Name Type Description Default base_raster DatasetReader

    The base raster to determine target raster grid properties.

    required rasters_to_unify Sequence[DatasetReader]

    Rasters to be unified with the base raster.

    required resampling_method Resampling

    Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.

    nearest same_extent bool

    If the unified rasters will be forced to have the same extent/bounds as the base raster. Expands smaller rasters with nodata cells. Defaults to False.

    False

    Returns:

    Type Description List[Tuple[ndarray, dict]]

    List of unified rasters' data and metadata. First element is the base raster.

    Raises:

    Type Description InvalidParameterValueException

    Rasters to unify is empty.

    Source code in eis_toolkit/raster_processing/unifying.py
    @beartype\ndef unify_raster_grids(\n    base_raster: rasterio.io.DatasetReader,\n    rasters_to_unify: Sequence[rasterio.io.DatasetReader],\n    resampling_method: Resampling = Resampling.nearest,\n    same_extent: bool = False,\n) -> List[Tuple[np.ndarray, dict]]:\n    \"\"\"Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.\n\n    Args:\n        base_raster: The base raster to determine target raster grid properties.\n        rasters_to_unify: Rasters to be unified with the base raster.\n        resampling_method: Resampling method. Most suitable\n            method depends on the dataset and context. Nearest, bilinear and cubic are some\n            common choices. This parameter defaults to nearest.\n        same_extent: If the unified rasters will be forced to have the same extent/bounds\n            as the base raster. Expands smaller rasters with nodata cells. Defaults to False.\n\n    Returns:\n        List of unified rasters' data and metadata. First element is the base raster.\n\n    Raises:\n        InvalidParameterValueException: Rasters to unify is empty.\n    \"\"\"\n    if len(rasters_to_unify) == 0:\n        raise InvalidParameterValueException(\"Rasters to unify is empty.\")\n\n    out_rasters = _unify_raster_grids(base_raster, rasters_to_unify, resampling_method, same_extent)\n    return out_rasters\n
    "},{"location":"raster_processing/unique_combinations/","title":"Unique combinations in rasters","text":""},{"location":"raster_processing/unique_combinations/#eis_toolkit.raster_processing.unique_combinations.unique_combinations","title":"unique_combinations(raster_list)","text":"

    Get combinations of raster values between rasters.

    All bands in all rasters are used for analysis. The first band of the first raster is used for reference when making the output.

    Parameters:

    Name Type Description Default raster_list Sequence[DatasetReader]

    Rasters to be used for finding combinations.

    required

    Returns:

    Name Type Description out_image ndarray

    Combinations of rasters.

    out_meta dict

    The metadata of the first raster in raster_list.

    Source code in eis_toolkit/raster_processing/unique_combinations.py
    @beartype\ndef unique_combinations(  # type: ignore[no-any-unimported]\n    raster_list: Sequence[rasterio.io.DatasetReader],\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Get combinations of raster values between rasters.\n\n    All bands in all rasters are used for analysis.\n    The first band of the first raster is used for reference when making the output.\n\n    Args:\n        raster_list: Rasters to be used for finding combinations.\n\n    Returns:\n        out_image: Combinations of rasters.\n        out_meta: The metadata of the first raster in raster_list.\n    \"\"\"\n    bands = []\n    out_meta = raster_list[0].meta\n    out_meta[\"count\"] = 1\n\n    for raster in raster_list:\n        for band in range(1, raster.count + 1):\n            bands.append(raster.read(band))\n\n    if len(bands) == 1:\n        raise InvalidParameterValueException(\"Expected to have more bands than 1\")\n\n    if check_raster_grids(raster_list) is not True:\n        raise InvalidParameterValueException(\"Expected raster grids to be of same shape\")\n\n    out_image = _unique_combinations(bands)\n    return out_image, out_meta\n
    "},{"location":"raster_processing/windowing/","title":"Windowing","text":""},{"location":"raster_processing/windowing/#eis_toolkit.raster_processing.windowing.extract_window","title":"extract_window(raster, center_coords, height, width)","text":"

    Extract window from raster.

    Center coordinate must be inside the raster but window can extent outside the raster in which case padding with raster nodata value is used. Args: raster: Source raster. center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS. height: Window height in pixels. width: Window width in pixels.

    Returns:

    Type Description ndarray

    The extracted raster window.

    dict

    The updated metadata.

    Raises:

    Type Description InvalidParameterValueException

    Window size is too small.

    CoordinatesOutOfBoundException

    Window center coordinates are out of raster bounds.

    Source code in eis_toolkit/raster_processing/windowing.py
    @beartype\ndef extract_window(\n    raster: rasterio.io.DatasetReader,\n    center_coords: Tuple[Number, Number],\n    height: int,\n    width: int,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Extract window from raster.\n\n       Center coordinate must be inside the raster but window can extent outside the raster in which case padding with\n       raster nodata value is used.\n    Args:\n        raster: Source raster.\n        center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS.\n        height: Window height in pixels.\n        width: Window width in pixels.\n\n    Returns:\n        The extracted raster window.\n        The updated metadata.\n\n    Raises:\n        InvalidParameterValueException: Window size is too small.\n        CoordinatesOutOfBoundException: Window center coordinates are out of raster bounds.\n    \"\"\"\n\n    if height < 1 or width < 1:\n        raise InvalidParameterValueException(f\"Window size is too small: {height}, {width}.\")\n\n    center_x = center_coords[0]\n    center_y = center_coords[1]\n\n    if (\n        center_x < raster.bounds.left\n        or center_x > raster.bounds.right\n        or center_y < raster.bounds.bottom\n        or center_y > raster.bounds.top\n    ):\n        raise CoordinatesOutOfBoundsException(\"Window center coordinates are out of raster bounds.\")\n\n    out_image, out_meta = _extract_window(raster, center_coords, height, width)\n\n    return out_image, out_meta\n
    "},{"location":"training_data_tools/class_balancing/","title":"Class balancing","text":""},{"location":"training_data_tools/class_balancing/#eis_toolkit.training_data_tools.class_balancing.balance_SMOTETomek","title":"balance_SMOTETomek(X, y, sampling_strategy='auto', random_state=None)","text":"

    Balances the classes of input dataset using SMOTETomek resampling method.

    Parameters:

    Name Type Description Default X Union[DataFrame, ndarray]

    The feature matrix (input data as a DataFrame).

    required y Union[Series, ndarray]

    The target labels corresponding to the feature matrix.

    required sampling_strategy Union[float, str, dict]

    Parameter controlling how to perform the resampling. If float, specifies the ratio of samples in minority class to samples of majority class, if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"), if dict, the keys should be targeted classes and values the desired number of samples for the class. Defaults to \"auto\", which will resample all classes except the majority class.

    'auto' random_state Optional[int]

    Parameter controlling randomization of the algorithm. Can be given a seed (number). Defaults to None, which randomizes the seed.

    None

    Returns:

    Type Description tuple[Union[DataFrame, ndarray], Union[Series, ndarray]]

    Resampled feature matrix and target labels.

    Raises:

    Type Description NonMatchingParameterLengthsException

    If X and y have different length.

    Source code in eis_toolkit/training_data_tools/class_balancing.py
    @beartype\ndef balance_SMOTETomek(\n    X: Union[pd.DataFrame, np.ndarray],\n    y: Union[pd.Series, np.ndarray],\n    sampling_strategy: Union[float, str, dict] = \"auto\",\n    random_state: Optional[int] = None,\n) -> tuple[Union[pd.DataFrame, np.ndarray], Union[pd.Series, np.ndarray]]:\n    \"\"\"Balances the classes of input dataset using SMOTETomek resampling method.\n\n    Args:\n        X: The feature matrix (input data as a DataFrame).\n        y: The target labels corresponding to the feature matrix.\n        sampling_strategy: Parameter controlling how to perform the resampling.\n            If float, specifies the ratio of samples in minority class to samples of majority class,\n            if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"),\n            if dict, the keys should be targeted classes and values the desired number of samples for the class.\n            Defaults to \"auto\", which will resample all classes except the majority class.\n        random_state: Parameter controlling randomization of the algorithm. Can be given a seed (number).\n            Defaults to None, which randomizes the seed.\n\n    Returns:\n        Resampled feature matrix and target labels.\n\n    Raises:\n        NonMatchingParameterLengthsException: If X and y have different length.\n    \"\"\"\n\n    if len(X) != len(y):\n        raise exceptions.NonMatchingParameterLengthsException(\n            \"Feature matrix X and target labels y must have the same length.\"\n        )\n\n    X_res, y_res = SMOTETomek(sampling_strategy=sampling_strategy, random_state=random_state).fit_resample(X, y)\n    return X_res, y_res\n
    "},{"location":"transformations/binarize/","title":"Binarize","text":""},{"location":"transformations/binarize/#eis_toolkit.transformations.binarize.binarize","title":"binarize(raster, bands=None, thresholds=[Number], nodata=None)","text":"

    Binarize data based on a given threshold.

    Replaces values less or equal threshold with 0. Replaces values greater than the threshold with 1.

    Takes one nodata value which will be re-written after transformation.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The threshold can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None thresholds Sequence[Number]

    Threshold values for transformation.

    [Number] nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    Source code in eis_toolkit/transformations/binarize.py
    @beartype\ndef binarize(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    thresholds: Sequence[Number] = [Number],\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Binarize data based on a given threshold.\n\n    Replaces values less or equal threshold with 0.\n    Replaces values greater than the threshold with 1.\n\n    Takes one nodata value which will be re-written after transformation.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The threshold can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        thresholds: Threshold values for transformation.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = cast_scalar_to_int(raster.nodata if nodata is None else nodata)\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection.\")\n\n    if check_parameter_length(bands, thresholds) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid threshold length.\")\n\n    expanded_args = expand_and_zip(bands, thresholds)\n    thresholds = [element[1] for element in expanded_args]\n\n    out_settings = {}\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        inital_dtype = band_array.dtype\n\n        band_mask = np.isin(band_array, nodata)\n        band_array = _binarize(band_array, threshold=thresholds[i])\n        band_array = np.where(band_mask, nodata, band_array)\n\n        if not check_dtype_for_int(nodata):\n            band_array = band_array.astype(inital_dtype)\n        else:\n            band_array = band_array.astype(np.min_scalar_type(nodata))\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"threshold\": thresholds[i],\n            \"nodata\": nodata,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/clip/","title":"Clip","text":""},{"location":"transformations/clip/#eis_toolkit.transformations.clip.clip_transform","title":"clip_transform(raster, limits, bands=None, nodata=None)","text":"

    Clips data based on specified upper and lower limits.

    Takes one nodata value that will be ignored in calculations. Replaces values below the lower limit and above the upper limit with provided values, respecively. Works both one-sided and two-sided but raises error if no limits provided.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The limits can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None limits Sequence[Tuple[Optional[Number], Optional[Number]]]

    Lower and upper limits (lower, upper) as real values.

    required nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values).

    Source code in eis_toolkit/transformations/clip.py
    @beartype\ndef clip_transform(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    limits: Sequence[Tuple[Optional[Number], Optional[Number]]],\n    bands: Optional[Sequence[int]] = None,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Clips data based on specified upper and lower limits.\n\n    Takes one nodata value that will be ignored in calculations.\n    Replaces values below the lower limit and above the upper limit with provided values, respecively.\n    Works both one-sided and two-sided but raises error if no limits provided.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The limits can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        limits: Lower and upper limits (lower, upper) as real values.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values).\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, limits) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid limit length.\")\n\n    for item in limits:\n        if item.count(None) == len(item):\n            raise InvalidParameterValueException(f\"Limit values all None: {item}.\")\n\n        if not check_minmax_position(item):\n            raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n    expanded_args = expand_and_zip(bands, limits)\n    limits = [element[1] for element in expanded_args]\n\n    out_settings = {}\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        inital_dtype = band_array.dtype\n\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n        band_array = _clip_transform(band_array, limits=limits[i])\n\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"limit_lower\": cast_scalar_to_int(limits[i][0]),\n            \"limit_upper\": cast_scalar_to_int(limits[i][1]),\n            \"nodata\": cast_scalar_to_int(nodata),\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/linear/","title":"Linear","text":""},{"location":"transformations/linear/#eis_toolkit.transformations.linear.min_max_scaling","title":"min_max_scaling(raster, bands=None, new_range=[(0, 1)], nodata=None)","text":"

    Normalize data based on a specified new range.

    Uses the provided new minimum and maximum to transform data into the new interval. Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used. The new_range can be set for each band individually. If a parameter contains only 1 entry, it will be applied for all bands.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None new_range Sequence[Tuple[Number, Number]]

    The new interval data will be transformed into. First value corresponds to min, second to max.

    [(0, 1)] nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values).

    Source code in eis_toolkit/transformations/linear.py
    @beartype\ndef min_max_scaling(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    new_range: Sequence[Tuple[Number, Number]] = [(0, 1)],\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Normalize data based on a specified new range.\n\n    Uses the provided new minimum and maximum to transform data into the new interval.\n    Takes one nodata value that will be ignored in calculations.\n\n    If no band/column selection specified, all bands/columns will be used.\n    The new_range can be set for each band individually.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        new_range: The new interval data will be transformed into. First value corresponds to min, second to max.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values).\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, new_range) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid new_range length\")\n\n    for item in new_range:\n        if not check_minmax_position(item):\n            raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}\")\n\n    expanded_args = expand_and_zip(bands, new_range)\n    new_range = [element[1] for element in expanded_args]\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n        band_array = _min_max_scaling(band_array.astype(np.float64), new_range=new_range[i])\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"scaled_min\": new_range[i][0],\n            \"scaled_max\": new_range[i][1],\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/linear/#eis_toolkit.transformations.linear.z_score_normalization","title":"z_score_normalization(raster, bands=None, nodata=None)","text":"

    Normalize data based on mean and standard deviation.

    Results will have a mean = 0 and standard deviation = 1. Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    Source code in eis_toolkit/transformations/linear.py
    @beartype\ndef z_score_normalization(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Normalize data based on mean and standard deviation.\n\n    Results will have a mean = 0 and standard deviation = 1.\n    Takes one nodata value that will be ignored in calculations.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection.\")\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n        band_array, mean_array, sd_array = _z_score_normalization(band_array.astype(np.float64))\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"original_mean\": truncate_decimal_places(mean_array, decimal_places=out_decimals),\n            \"original_sd\": truncate_decimal_places(sd_array, decimal_places=out_decimals),\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/logarithmic/","title":"Logarithmic","text":""},{"location":"transformations/logarithmic/#eis_toolkit.transformations.logarithmic.log_transform","title":"log_transform(raster, bands=None, log_transform=['log2'], nodata=None)","text":"

    Perform a logarithmic transformation on the provided data.

    Takes one nodata value that will be ignored in calculations. Negative values will not be considered for transformation and replaced by the specific nodata value.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The log_transform can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None log_transform Sequence[str]

    The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.

    ['log2'] nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands

    InvalidParameterValueException

    The input does not match the requirements (values, order of values)

    Source code in eis_toolkit/transformations/logarithmic.py
    @beartype\ndef log_transform(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    log_transform: Sequence[str] = [\"log2\"],\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Perform a logarithmic transformation on the provided data.\n\n    Takes one nodata value that will be ignored in calculations.\n    Negative values will not be considered for transformation and replaced by the specific nodata value.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The log_transform can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        log_transform: The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands\n        InvalidParameterValueException: The input does not match the requirements (values, order of values)\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, log_transform) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid length for log-base values.\")\n\n    for item in log_transform:\n        if not (item == \"ln\" or item == \"log2\" or item == \"log10\"):\n            raise InvalidParameterValueException(f\"Invalid method: {item}.\")\n\n    expanded_args = expand_and_zip(bands, log_transform)\n    log_transform = [element[1] for element in expanded_args]\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n        band_array[band_array <= 0] = np.nan\n\n        if log_transform[i] == \"ln\":\n            band_array = _log_transform_ln(band_array.astype(np.float64))\n        elif log_transform[i] == \"log2\":\n            band_array = _log_transform_log2(band_array.astype(np.float64))\n        elif log_transform[i] == \"log10\":\n            band_array = _log_transform_log10(band_array.astype(np.float64))\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"log_transform\": log_transform[i],\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/one_hot_encoding/","title":"One-hot encoding","text":""},{"location":"transformations/one_hot_encoding/#eis_toolkit.transformations.one_hot_encoding.one_hot_encode","title":"one_hot_encode(data, columns=None, drop_original_columns=True, drop_category=None, sparse_output=True, out_dtype=int, handle_unknown='infrequent_if_exist', min_frequency=None, max_categories=None)","text":"

    Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.

    This function converts categorical variables into a form that could be provided to machine learning algorithms for better prediction. For each unique category in the feature, a new binary column is created.

    Continuous data should not be given to this function to avoid excessive amounts of binary features. If input is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.

    The function allows control over aspects like handling unknown categories, controlling sparsity of the output, and setting data type of the encoded columns.

    Parameters:

    Name Type Description Default data Union[DataFrame, ndarray]

    Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be restricted to specified columns.

    required columns Optional[Sequence[str]]

    Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.

    None drop_original_columns bool

    If True and 'data' is a DataFrame, the original columns being encoded will be dropped from the output. Defaults to True.

    True drop_category Optional[Literal[first, if_binary]]

    Specifies a method to drop one of the categories to avoid multicollinearity. 'first' drops the first category, 'if_binary' drops one category only if the feature is binary. If None, no category is dropped. Defaults to None.

    None sparse_output bool

    Determines whether the output matrix is sparse or dense. Defaults to True (sparse).

    True out_dtype Union[type, dtype]

    Numeric data type of the output. Defaults to int.

    int handle_unknown Literal[error, ignore, infrequent_if_exist]

    Specifies how to handle unknown categories encountered during transform. 'error' raises an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent. Defaults to 'infrequent_if_exist'.

    'infrequent_if_exist' min_frequency Optional[Number]

    The minimum frequency (as a float or an int) needed to include a category in encoding. Optional parameter. Defaults to None.

    None max_categories Optional[int]

    The maximum number of categories to include in encoding. Optional parameter. Defaults to None.

    None

    Returns:

    Type Description Union[DataFrame, ndarray, csr_matrix]

    Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse) if input was a Numpy array.

    Raises:

    Type Description EmptyDataFrameException

    If the input DataFrame is empty.

    InvalidDatasetException

    If the input Numpy array is empty.

    InvalidColumnException

    If any specified column to encode does not exist in the input DataFrame.

    Source code in eis_toolkit/transformations/one_hot_encoding.py
    @beartype\ndef one_hot_encode(\n    data: Union[pd.DataFrame, np.ndarray],\n    columns: Optional[Sequence[str]] = None,\n    drop_original_columns: bool = True,\n    drop_category: Optional[Literal[\"first\", \"if_binary\"]] = None,\n    sparse_output: bool = True,\n    out_dtype: Union[type, np.dtype] = int,\n    handle_unknown: Literal[\"error\", \"ignore\", \"infrequent_if_exist\"] = \"infrequent_if_exist\",\n    min_frequency: Optional[Number] = None,\n    max_categories: Optional[int] = None,\n) -> Union[pd.DataFrame, np.ndarray, sparse._csr.csr_matrix]:\n    \"\"\"\n    Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.\n\n    This function converts categorical variables into a form that could be provided to machine learning\n    algorithms for better prediction. For each unique category in the feature, a new binary column is created.\n\n    Continuous data should not be given to this function to avoid excessive amounts of binary features. If input\n    is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.\n\n    The function allows control over aspects like handling unknown categories, controlling sparsity of the output,\n    and setting data type of the encoded columns.\n\n    Args:\n        data: Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be\n            restricted to specified columns.\n        columns: Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are\n            considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.\n        drop_original_columns: If True and 'data' is a DataFrame, the original columns being encoded will\n            be dropped from the output. Defaults to True.\n        drop_category: Specifies a method to drop one of the categories to avoid multicollinearity.\n            'first' drops the first category, 'if_binary' drops one category only if the feature is binary.\n            If None, no category is dropped. Defaults to None.\n        sparse_output: Determines whether the output matrix is sparse or dense. Defaults to True (sparse).\n        out_dtype: Numeric data type of the output. Defaults to int.\n        handle_unknown: Specifies how to handle unknown categories encountered during transform. 'error' raises\n            an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent.\n            Defaults to 'infrequent_if_exist'.\n        min_frequency: The minimum frequency (as a float or an int) needed to include a category in encoding.\n            Optional parameter. Defaults to None.\n        max_categories: The maximum number of categories to include in encoding. Optional parameter.\n            Defaults to None.\n\n    Returns:\n        Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse)\n            if input was a Numpy array.\n\n    Raises:\n        EmptyDataFrameException: If the input DataFrame is empty.\n        InvalidDatasetException: If the input Numpy array is empty.\n        InvalidColumnException: If any specified column to encode does not exist in the input DataFrame.\n    \"\"\"\n    is_dataframe = isinstance(data, pd.DataFrame)\n\n    if is_dataframe:\n        if data.empty:\n            raise exceptions.EmptyDataFrameException(\"Input DataFrame is empty.\")\n        df = data.copy()\n\n        if columns is not None:\n            if not check_columns_valid(df, columns):\n                raise exceptions.InvalidColumnException(\"All selected columns were not found in the input DataFrame.\")\n            transform_df = df[columns]\n        else:\n            transform_df = df\n    else:\n        if data.size == 0:\n            raise exceptions.InvalidDatasetException(\"Input array is empty.\")\n        transform_df = pd.DataFrame(data)\n\n    encoder = OneHotEncoder(\n        drop=drop_category,\n        sparse_output=sparse_output,\n        dtype=out_dtype,\n        handle_unknown=handle_unknown,\n        min_frequency=min_frequency,\n        max_categories=max_categories,\n        feature_name_combiner=lambda feature, category: str(feature) + \"_\" + str(category),\n    )\n\n    # Transform selected columns\n    encoded_data = encoder.fit_transform(transform_df)\n    encoded_cols = encoder.get_feature_names_out(transform_df.columns)\n\n    # If input was a DataFrame, create output DataFrame\n    if is_dataframe:\n        if sparse_output:\n            encoded_df = pd.DataFrame.sparse.from_spmatrix(encoded_data, columns=encoded_cols, index=df.index)\n        else:\n            encoded_df = pd.DataFrame(encoded_data, columns=encoded_cols, index=df.index)\n\n        if drop_original_columns:\n            df = df.drop(transform_df.columns, axis=1)\n\n        encoded_data = pd.concat([df, encoded_df], axis=1)\n\n    return encoded_data\n
    "},{"location":"transformations/sigmoid/","title":"Sigmoid","text":""},{"location":"transformations/sigmoid/#eis_toolkit.transformations.sigmoid.sigmoid_transform","title":"sigmoid_transform(raster, bands=None, bounds=[(0, 1)], slope=[1], center=True, nodata=None)","text":"

    Transform data into a sigmoid-shape based on a specified new range.

    Uses the provided new minimum and maximum, shift and slope parameters to transform the data. Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The bounds and slope values can be set for each band individually.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None bounds Sequence[Tuple[Number, Number]]

    Boundaries for the calculation of the sigmoid function (lower, upper).

    [(0, 1)] slope Sequence[Number]

    Value which modifies the slope of the resulting sigmoid-curve.

    [1] center bool

    Center array values around mean = 0 before sigmoid transformation.

    True nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values)

    Source code in eis_toolkit/transformations/sigmoid.py
    @beartype\ndef sigmoid_transform(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    bands: Optional[Sequence[int]] = None,\n    bounds: Sequence[Tuple[Number, Number]] = [(0, 1)],\n    slope: Sequence[Number] = [1],\n    center: bool = True,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Transform data into a sigmoid-shape based on a specified new range.\n\n    Uses the provided new minimum and maximum, shift and slope parameters to transform the data.\n    Takes one nodata value that will be ignored in calculations.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The bounds and slope values can be set for each band individually.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        bounds: Boundaries for the calculation of the sigmoid function (lower, upper).\n        slope: Value which modifies the slope of the resulting sigmoid-curve.\n        center: Center array values around mean = 0 before sigmoid transformation.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values)\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    for parameter_name, parameter in [(\"bounds\", bounds), (\"slope\", slope)]:\n        if check_parameter_length(bands, parameter) is False:\n            raise NonMatchingParameterLengthsException(f\"Invalid length for {parameter_name}.\")\n\n    for item in bounds:\n        if check_minmax_position(item) is False:\n            raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n    expanded_args = expand_and_zip(bands, bounds, slope)\n    bounds = [element[1] for element in expanded_args]\n    slope = [element[2] for element in expanded_args]\n\n    out_settings = {}\n    out_decimals = set_max_precision()\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n        band_array = _sigmoid_transform(band_array.astype(np.float64), bounds=bounds[i], slope=slope[i], center=center)\n\n        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"bound_lower\": truncate_decimal_places(bounds[i][0], decimal_places=out_decimals),\n            \"bound_upper\": truncate_decimal_places(bounds[i][1], decimal_places=out_decimals),\n            \"slope\": slope[i],\n            \"center\": center,\n            \"nodata\": nodata,\n            \"decimal_places\": out_decimals,\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"transformations/winsorize/","title":"Winsorize","text":""},{"location":"transformations/winsorize/#eis_toolkit.transformations.winsorize.winsorize","title":"winsorize(raster, percentiles, bands=None, inside=False, nodata=None)","text":"

    Winsorize data based on specified percentile values.

    Takes one nodata value that will be ignored in calculations. Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided. Works both one-sided and two-sided but raises error if no percentile values provided.

    Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%]. And percentile_upper = 10 corresponds to the intervall [90%, max]. I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.

    Calculation of percentiles is ambiguous. Users can choose whether to use the value for replacement from inside or outside of the respective interval. Example: Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated percentiles are (5, 35) for inside and (10, 30) for outside. This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.

    If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The percentiles can be set for each band individually, but inside parameter is same for all bands.

    Parameters:

    Name Type Description Default raster DatasetReader

    Data object to be transformed.

    required bands Optional[Sequence[int]]

    Selection of bands to be transformed.

    None percentiles Sequence[Tuple[Optional[Number], Optional[Number]]]

    Lower and upper percentile values (lower, upper) between [0, 100].

    required inside bool

    Whether to use the value for replacement from the left or right of the calculated percentile.

    False nodata Optional[Number]

    Nodata value to be considered.

    None

    Returns:

    Name Type Description out_array ndarray

    The transformed data.

    out_meta dict

    Updated metadata.

    out_settings dict

    Log of input settings and calculated statistics if available.

    Raises:

    Type Description InvalidRasterBandException

    The input contains invalid band numbers.

    NonMatchingParameterLengthsException

    The input does not match the number of selected bands.

    InvalidParameterValueException

    The input does not match the requirements (values, order of values)

    Source code in eis_toolkit/transformations/winsorize.py
    @beartype\ndef winsorize(  # type: ignore[no-any-unimported]\n    raster: rasterio.io.DatasetReader,\n    percentiles: Sequence[Tuple[Optional[Number], Optional[Number]]],\n    bands: Optional[Sequence[int]] = None,\n    inside: bool = False,\n    nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n    \"\"\"\n    Winsorize data based on specified percentile values.\n\n    Takes one nodata value that will be ignored in calculations.\n    Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided.\n    Works both one-sided and two-sided but raises error if no percentile values provided.\n\n    Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%].\n    And percentile_upper = 10 corresponds to the intervall [90%, max].\n    I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.\n\n    Calculation of percentiles is ambiguous. Users can choose whether to use the value\n    for replacement from inside or outside of the respective interval. Example:\n    Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated\n    percentiles are (5, 35) for inside and (10, 30) for outside.\n    This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.\n\n    If no band/column selection specified, all bands/columns will be used.\n    If a parameter contains only 1 entry, it will be applied for all bands.\n    The percentiles can be set for each band individually, but inside parameter is same for all bands.\n\n    Args:\n        raster: Data object to be transformed.\n        bands: Selection of bands to be transformed.\n        percentiles: Lower and upper percentile values (lower, upper) between [0, 100].\n        inside: Whether to use the value for replacement from the left or right of the calculated percentile.\n        nodata: Nodata value to be considered.\n\n    Returns:\n        out_array: The transformed data.\n        out_meta: Updated metadata.\n        out_settings: Log of input settings and calculated statistics if available.\n\n    Raises:\n        InvalidRasterBandException: The input contains invalid band numbers.\n        NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n        InvalidParameterValueException: The input does not match the requirements (values, order of values)\n    \"\"\"\n    bands = list(range(1, raster.count + 1)) if bands is None else bands\n    nodata = raster.nodata if nodata is None else nodata\n\n    if check_raster_bands(raster, bands) is False:\n        raise InvalidRasterBandException(\"Invalid band selection\")\n\n    if check_parameter_length(bands, percentiles) is False:\n        raise NonMatchingParameterLengthsException(\"Invalid length for percentiles.\")\n\n    for item in percentiles:\n        if item.count(None) == len(item):\n            raise InvalidParameterValueException(f\"Percentile values all None: {item}.\")\n\n        if None not in item and sum(item) >= 100:\n            raise InvalidParameterValueException(f\"Sum >= 100: {item}.\")\n\n        if item[0] is not None and not (0 < item[0] < 100):\n            raise InvalidParameterValueException(f\"Invalid lower percentile value: {item}.\")\n\n        if item[1] is not None and not (0 < item[1] < 100):\n            raise InvalidParameterValueException(f\"Invalid upper percentile value: {item}.\")\n\n    expanded_args = expand_and_zip(bands, percentiles)\n    percentiles = [element[1] for element in expanded_args]\n\n    out_settings = {}\n\n    for i in range(0, len(bands)):\n        band_array = raster.read(bands[i])\n        inital_dtype = band_array.dtype\n\n        band_array = cast_array_to_float(band_array, cast_int=True)\n        band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n        band_array, calculated_lower, calculated_upper = _winsorize(\n            band_array, percentiles=percentiles[i], inside=inside\n        )\n\n        band_array = nan_to_nodata(band_array, nodata_value=nodata)\n        band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n        band_array = np.expand_dims(band_array, axis=0)\n\n        if i == 0:\n            out_array = band_array.copy()\n        else:\n            out_array = np.vstack((out_array, band_array))\n\n        current_transform = f\"transformation {i + 1}\"\n        current_settings = {\n            \"band_origin\": bands[i],\n            \"percentile_lower\": cast_scalar_to_int(percentiles[i][0]),\n            \"percentile_upper\": cast_scalar_to_int(percentiles[i][1]),\n            \"calculated_lower\": cast_scalar_to_int(calculated_lower),\n            \"calculated_upper\": cast_scalar_to_int(calculated_upper),\n            \"nodata\": cast_scalar_to_int(nodata),\n        }\n\n        out_settings[current_transform] = current_settings\n\n    out_meta = raster.meta.copy()\n    out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n    return out_array, out_meta, out_settings\n
    "},{"location":"validation/calculate_auc/","title":"Calculate AUC","text":""},{"location":"validation/calculate_auc/#eis_toolkit.validation.calculate_auc.calculate_auc","title":"calculate_auc(x_values, y_values)","text":"

    Calculate area under curve (AUC).

    Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.

    Parameters:

    Name Type Description Default x_values Union[ndarray, Series]

    Either proportion of area or false positive rate values.

    required y_values Union[ndarray, Series]

    True positive rate values.

    required

    Returns:

    Type Description float

    The area under curve.

    Raises:

    Type Description InvalidParameterValueException

    x_values or y_values are out of bounds.

    Source code in eis_toolkit/validation/calculate_auc.py
    @beartype\ndef calculate_auc(x_values: Union[np.ndarray, pd.Series], y_values: Union[np.ndarray, pd.Series]) -> float:\n    \"\"\"Calculate area under curve (AUC).\n\n    Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be\n    always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.\n\n    Args:\n        x_values: Either proportion of area or false positive rate values.\n        y_values: True positive rate values.\n\n    Returns:\n        The area under curve.\n\n    Raises:\n        InvalidParameterValueException: x_values or y_values are out of bounds.\n    \"\"\"\n    if x_values.max() > 1 or x_values.min() < 0:\n        raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n    if y_values.max() > 1 or y_values.min() < 0:\n        raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n    auc_value = _calculate_auc(x_values=x_values, y_values=y_values)\n    return auc_value\n
    "},{"location":"validation/calculate_base_metrics/","title":"Calculate base metrics","text":""},{"location":"validation/calculate_base_metrics/#eis_toolkit.validation.calculate_base_metrics.calculate_base_metrics","title":"calculate_base_metrics(raster, deposits, band=1, negatives=None)","text":"

    Calculate true positive rate, proportion of area and false positive rate values for different thresholds.

    Function calculates true positive rate, proportion of area and false positive rate values for different thresholds which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false positive rate is optional and is only done if negative point locations are provided.

    Parameters:

    Name Type Description Default raster DatasetReader

    Mineral prospectivity map or evidence layer.

    required deposits GeoDataFrame

    Mineral deposit locations as points.

    required band int

    Band index of the mineral prospectivity map. Defaults to 1.

    1 negatives Optional[GeoDataFrame]

    Negative locations as points.

    None

    Returns:

    Type Description DataFrame

    DataFrame containing true positive rate, proportion of area, threshold values and false positive rate (optional) values.

    Raises:

    Type Description NonMatchingCrsException

    The raster and point data are not in the same CRS.

    NotApplicableGeometryTypeException

    The input geometries contain non-point features.

    Source code in eis_toolkit/validation/calculate_base_metrics.py
    @beartype\ndef calculate_base_metrics(\n    raster: rasterio.io.DatasetReader,\n    deposits: geopandas.GeoDataFrame,\n    band: int = 1,\n    negatives: Optional[geopandas.GeoDataFrame] = None,\n) -> pd.DataFrame:\n    \"\"\"Calculate true positive rate, proportion of area and false positive rate values for different thresholds.\n\n    Function calculates true positive rate, proportion of area and false positive rate values for different thresholds\n    which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false\n    positive rate is optional and is only done if negative point locations are provided.\n\n    Args:\n        raster: Mineral prospectivity map or evidence layer.\n        deposits: Mineral deposit locations as points.\n        band: Band index of the mineral prospectivity map. Defaults to 1.\n        negatives: Negative locations as points.\n\n    Returns:\n        DataFrame containing true positive rate, proportion of area, threshold values and false positive\n            rate (optional) values.\n\n    Raises:\n        NonMatchingCrsException: The raster and point data are not in the same CRS.\n        NotApplicableGeometryTypeException: The input geometries contain non-point features.\n    \"\"\"\n    if negatives is not None:\n        geometries = pd.concat([deposits, negatives]).geometry\n    else:\n        geometries = deposits[\"geometry\"]\n\n    if not check_matching_crs(\n        objects=[raster, geometries],\n    ):\n        raise NonMatchingCrsException(\"The raster and deposits are not in the same CRS.\")\n\n    if not check_geometry_types(\n        geometries=geometries,\n        allowed_types=[\"Point\"],\n    ):\n        raise NotApplicableGeometryTypeException(\"The input geometries contain non-point features.\")\n\n    base_metrics = _calculate_base_metrics(raster=raster, deposits=deposits, band=band, negatives=negatives)\n\n    return base_metrics\n
    "},{"location":"validation/get_pa_intersection/","title":"Get P-A plot intersection point","text":""},{"location":"validation/get_pa_intersection/#eis_toolkit.validation.get_pa_intersection.get_pa_intersection","title":"get_pa_intersection(true_positive_rate_values, proportion_of_area_values, threshold_values)","text":"

    Calculate the intersection point for prediction rate and area curves in (P-A plot).

    Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis. Area curve uses inverted proportion of area as y-axis.

    Parameters:

    Name Type Description Default true_positive_rate_values Union[ndarray, Series]

    True positive rate values, values should be within range 0-1.

    required proportion_of_area_values Union[ndarray, Series]

    Proportion of area values, values should be within range 0-1.

    required threshold_values Union[ndarray, Series]

    Threshold values that were used to calculate true positive rate and proportion of area.

    required

    Returns:

    Type Description Tuple[float, float]

    X and y coordinates of the intersection point.

    Raises:

    Type Description InvalidParameterValueException

    true_positive_rate_values or proportion_of_area_values values are out of bounds.

    Source code in eis_toolkit/validation/get_pa_intersection.py
    @beartype\ndef get_pa_intersection(\n    true_positive_rate_values: Union[np.ndarray, pd.Series],\n    proportion_of_area_values: Union[np.ndarray, pd.Series],\n    threshold_values: Union[np.ndarray, pd.Series],\n) -> Tuple[float, float]:\n    \"\"\"Calculate the intersection point for prediction rate and area curves in (P-A plot).\n\n    Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis.\n    Area curve uses inverted proportion of area as y-axis.\n\n    Args:\n        true_positive_rate_values: True positive rate values, values should be within range 0-1.\n        proportion_of_area_values: Proportion of area values, values should be within range 0-1.\n        threshold_values: Threshold values that were used to calculate true positive rate and proportion of area.\n\n    Returns:\n        X and y coordinates of the intersection point.\n\n    Raises:\n        InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n    \"\"\"\n    if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n        raise InvalidParameterValueException(\"true_positive_rate_values values should be within range 0-1\")\n\n    if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n        raise InvalidParameterValueException(\"proportion_of_area_values values should be within range 0-1\")\n\n    intersection = _get_pa_intersection(\n        true_positive_rate_values=true_positive_rate_values,\n        proportion_of_area_values=proportion_of_area_values,\n        threshold_values=threshold_values,\n    )\n\n    return intersection.x, intersection.y\n
    "},{"location":"validation/plot_correlation_matrix/","title":"Plot correlation matrix","text":""},{"location":"validation/plot_correlation_matrix/#eis_toolkit.validation.plot_correlation_matrix.plot_correlation_matrix","title":"plot_correlation_matrix(matrix, annotate=True, cmap=None, plot_title=None, **kwargs)","text":"

    Create a Seaborn heatmap to visualize correlation matrix.

    Parameters:

    Name Type Description Default matrix DataFrame

    Correlation matrix as a DataFrame.

    required annotate bool

    If plot squares should display the correlation values. Defaults to True.

    True cmap Optional[ListedColormap]

    Colormap for plotting. Optional parameter. Defaults to None, in which case a default colormap is used.

    None plot_title Optional[str]

    Title of the plot. Optional parameter, defaults to none (no title).

    None **kwargs dict

    Additional parameters to pass to Seaborn and matplotlib.

    {}

    Returns:

    Type Description Axes

    Matplotlib axes object with the produced plot.

    Raises:

    Type Description EmptyDataFrameException

    Input matrix is empty.

    Source code in eis_toolkit/validation/plot_correlation_matrix.py
    def plot_correlation_matrix(\n    matrix: pd.DataFrame,\n    annotate: bool = True,\n    cmap: Optional[matplotlib.colors.ListedColormap] = None,\n    plot_title: Optional[str] = None,\n    **kwargs: dict\n) -> matplotlib.axes.Axes:\n    \"\"\"\n    Create a Seaborn heatmap to visualize correlation matrix.\n\n    Args:\n        matrix: Correlation matrix as a DataFrame.\n        annotate: If plot squares should display the correlation values. Defaults to True.\n        cmap: Colormap for plotting. Optional parameter. Defaults to None, in which\n            case a default colormap is used.\n        plot_title: Title of the plot. Optional parameter, defaults to none (no title).\n        **kwargs: Additional parameters to pass to Seaborn and matplotlib.\n\n    Returns:\n        Matplotlib axes object with the produced plot.\n\n    Raises:\n        EmptyDataFrameException: Input matrix is empty.\n    \"\"\"\n    if matrix.empty:\n        raise exceptions.EmptyDataFrameException(\"Input matrix DataFrame is empty.\")\n\n    # Mask for the upper triangle of the heatmap\n    mask = np.triu(np.ones_like(matrix, dtype=bool))\n\n    if cmap is None:\n        # Generate a default diverging colormap\n        cmap = sns.diverging_palette(230, 20, as_cmap=True)\n\n    ax = sns.heatmap(\n        matrix,\n        mask=mask,\n        cmap=cmap,\n        vmax=0.3,\n        center=0,\n        square=True,\n        linewidths=0.5,\n        annot=annotate,\n        cbar_kws={\"shrink\": 0.5},\n        **kwargs\n    )\n    if plot_title is not None:\n        ax.set_title(plot_title)\n\n    return ax\n
    "},{"location":"validation/plot_prediction_area_curves/","title":"Plot prediction-area (P-A) curves","text":""},{"location":"validation/plot_prediction_area_curves/#eis_toolkit.validation.plot_prediction_area_curves.plot_prediction_area_curves","title":"plot_prediction_area_curves(true_positive_rate_values, proportion_of_area_values, threshold_values)","text":"

    Plot prediction-area (P-A) plot.

    Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g., Yousefi and Carranza (2015).

    Parameters:

    Name Type Description Default true_positive_rate_values Union[ndarray, Series]

    True positive rate values.

    required proportion_of_area_values Union[ndarray, Series]

    Proportion of area values.

    required threshold_values Union[ndarray, Series]

    Threshold values.

    required

    Returns:

    Type Description Figure

    P-A plot figure object.

    Raises:

    Type Description InvalidParameterValueException

    true_positive_rate_values or proportion_of_area_values values are out of bounds.

    References

    Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.

    Source code in eis_toolkit/validation/plot_prediction_area_curves.py
    @beartype\ndef plot_prediction_area_curves(\n    true_positive_rate_values: Union[np.ndarray, pd.Series],\n    proportion_of_area_values: Union[np.ndarray, pd.Series],\n    threshold_values: Union[np.ndarray, pd.Series],\n) -> matplotlib.figure.Figure:\n    \"\"\"Plot prediction-area (P-A) plot.\n\n    Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g.,\n    Yousefi and Carranza (2015).\n\n    Args:\n        true_positive_rate_values: True positive rate values.\n        proportion_of_area_values: Proportion of area values.\n        threshold_values: Threshold values.\n\n    Returns:\n        P-A plot figure object.\n\n    Raises:\n        InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n\n    References:\n        Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral\n        prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.\n    \"\"\"\n    if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n        raise InvalidParameterValueException(\"true_positive_rate values should be within range 0-1\")\n\n    if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n        raise InvalidParameterValueException(\"proportion_of_area values should be within range 0-1\")\n\n    fig = _plot_prediction_area_curves(\n        true_positive_rate_values=true_positive_rate_values,\n        proportion_of_area_values=proportion_of_area_values,\n        threshold_values=threshold_values,\n    )\n    return fig\n
    "},{"location":"validation/plot_rate_curve/","title":"Plot rate curve","text":""},{"location":"validation/plot_rate_curve/#eis_toolkit.validation.plot_rate_curve.plot_rate_curve","title":"plot_rate_curve(x_values, y_values, plot_type='success_rate')","text":"

    Plot success rate, prediction rate or ROC curve.

    Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.

    Parameters:

    Name Type Description Default x_values Union[ndarray, Series]

    False positive rate values or proportion of area values.

    required y_values Union[ndarray, Series]

    True positive rate values.

    required plot_type Literal['success_rate', 'prediction_rate', 'roc']

    Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".

    'success_rate'

    Returns:

    Type Description Figure

    Success rate, prediction rate or ROC plot figure object.

    Raises:

    Type Description InvalidParameterValueException

    Invalid plot type.

    InvalidParameterValueException

    x_values or y_values are out of bounds.

    Source code in eis_toolkit/validation/plot_rate_curve.py
    @beartype\ndef plot_rate_curve(\n    x_values: Union[np.ndarray, pd.Series],\n    y_values: Union[np.ndarray, pd.Series],\n    plot_type: Literal[\"success_rate\", \"prediction_rate\", \"roc\"] = \"success_rate\",\n) -> matplotlib.figure.Figure:\n    \"\"\"Plot success rate, prediction rate or ROC curve.\n\n    Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false\n    positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.\n\n    Args:\n        x_values: False positive rate values or proportion of area values.\n        y_values: True positive rate values.\n        plot_type: Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".\n\n    Returns:\n        Success rate, prediction rate or ROC plot figure object.\n\n    Raises:\n        InvalidParameterValueException: Invalid plot type.\n        InvalidParameterValueException: x_values or y_values are out of bounds.\n    \"\"\"\n    if plot_type == \"success_rate\":\n        label = \"Success rate\"\n        xlab = \"Proportion of area\"\n    elif plot_type == \"prediction_rate\":\n        label = \"Prediction rate\"\n        xlab = \"Proportion of area\"\n    elif plot_type == \"roc\":\n        label = \"ROC\"\n        xlab = \"False positive rate\"\n    else:\n        raise InvalidParameterValueException(\"Invalid plot type\")\n\n    if x_values.max() > 1 or x_values.min() < 0:\n        raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n    if y_values.max() > 1 or y_values.min() < 0:\n        raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n    fig = _plot_rate_curve(x_values=x_values, y_values=y_values, label=label, xlab=xlab)\n\n    return fig\n
    "},{"location":"vector_processing/cell_based_association/","title":"Cell-Based Association","text":""},{"location":"vector_processing/cell_based_association/#eis_toolkit.vector_processing.cell_based_association.cell_based_association","title":"cell_based_association(cell_size, geodata, output_path, column=None, subset_target_attribute_values=None, add_name=None, add_buffer=None)","text":"

    Creation of CBA matrix.

    Initializes a CBA matrix from a vector file. The mesh is calculated according to the geometries contained in this file and the size of cells. Allows to add multiple vector data to the matrix, based on targeted shapes and/or attributes.

    Parameters:

    Name Type Description Default cell_size int

    Size of the cells.

    required geodata List[GeoDataFrame]

    GeoDataFrame to create the CBA matrix. Additional GeoDataFrame(s) can be imputed to add to the CBA matrix.

    required output_path str

    Name of the saved .tif file.

    required column Optional[List[str]]

    Name of the column of interest. If no attribute is specified, then an artificial attribute is created representing the presence or absence of the geometries of this file for each cell of the CBA grid. A categorical attribute will generate as many columns (binary) in the CBA matrix than values considered of interest (dummification). See parameter . Additional column(s) can be imputed for each added GeoDataFrame(s). None subset_target_attribute_values Optional[List[Union[None, list, str]]]

    List of values of interest of the target attribute, in case a categorical target attribute has been specified. Allows to filter a subset of relevant values. Additional values can be imputed for each added GeoDataFrame(s).

    None add_name Optional[List[Union[str, None]]]

    Name of the column(s) to add to the matrix.

    None add_buffer Optional[List[Union[Number, bool]]]

    Allow the use of a buffer around shapes before the intersection with CBA cells for the added GeoDataFrame(s). Minimize border effects or allow increasing positive samples (i.e. cells with mineralization). The size of the buffer is computed using the CRS (if projected CRS in meters: value in meters).

    None

    Returns:

    Type Description GeoDataFrame

    CBA matrix is created.

    Source code in eis_toolkit/vector_processing/cell_based_association.py
    @beartype\ndef cell_based_association(\n    cell_size: int,\n    geodata: List[gpd.GeoDataFrame],\n    output_path: str,\n    column: Optional[List[str]] = None,\n    subset_target_attribute_values: Optional[List[Union[None, list, str]]] = None,\n    add_name: Optional[List[Union[str, None]]] = None,\n    add_buffer: Optional[List[Union[Number, bool]]] = None,\n) -> gpd.GeoDataFrame:\n    \"\"\"Creation of CBA matrix.\n\n    Initializes a CBA matrix from a vector file. The mesh is calculated\n    according to the geometries contained in this file and the size of cells.\n    Allows to add multiple vector data to the matrix, based on targeted shapes\n    and/or attributes.\n\n    Args:\n        cell_size: Size of the cells.\n        geodata: GeoDataFrame to create the CBA matrix. Additional\n            GeoDataFrame(s) can be imputed to add to the CBA matrix.\n        output_path: Name of the saved .tif file.\n        column: Name of the column of interest. If no attribute is specified,\n            then an artificial attribute is created representing the presence\n            or absence of the geometries of this file for each cell of the CBA\n            grid. A categorical attribute will generate as many columns (binary)\n            in the CBA matrix than values considered of interest (dummification).\n            See parameter <subset_target_attribute_values>. Additional\n            column(s) can be imputed for each added GeoDataFrame(s).\n        subset_target_attribute_values: List of values of interest of the\n            target attribute, in case a categorical target attribute has been\n            specified. Allows to filter a subset of relevant values. Additional\n            values can be imputed for each added GeoDataFrame(s).\n        add_name: Name of the column(s) to add to the matrix.\n        add_buffer: Allow the use of a buffer around shapes before the\n            intersection with CBA cells for the added GeoDataFrame(s). Minimize\n            border effects or allow increasing positive samples (i.e. cells\n            with mineralization). The size of the buffer is computed using the\n            CRS (if projected CRS in meters: value in meters).\n\n    Returns:\n        CBA matrix is created.\n    \"\"\"\n\n    # Swapping None to list values\n    if column is None:\n        column = [\"\"]\n    if add_buffer is None:\n        add_buffer = [False]\n\n    # Consistency checks on input data\n    for frame in geodata:\n        if frame.empty:\n            raise exceptions.EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if cell_size <= 0:\n        raise exceptions.InvalidParameterValueException(\"Expected cell size to be positive and non-zero.\")\n\n    add_buffer = [False if x == 0 else x for x in add_buffer]\n    if any(num < 0 for num in add_buffer):\n        raise exceptions.InvalidParameterValueException(\"Expected buffer value to be positive, null or False.\")\n\n    for i, name in enumerate(column):\n        if column[i] == \"\":\n            if subset_target_attribute_values[i] is not None:\n                raise exceptions.InvalidParameterValueException(\"Can't use subset of values if no column is targeted.\")\n        elif column[i] not in geodata[i]:\n            raise exceptions.InvalidColumnException(\"Targeted column not found in the GeoDataFrame.\")\n\n    for i, subset in enumerate(subset_target_attribute_values):\n        if subset is not None:\n            for value in subset:\n                if value not in geodata[i][column[i]].unique():\n                    raise exceptions.InvalidParameterValueException(\n                        \"Subset of value(s) not found in the targeted column.\"\n                    )\n\n    # Computation\n    for i, data in enumerate(geodata):\n        if i == 0:\n            # Initialization of the CBA matrix\n            grid, cba = _init_from_vector_data(cell_size, geodata[0], column[0], subset_target_attribute_values[0])\n        else:\n            # If necessary, adding data to matrix\n            cba = _add_layer(\n                cba,\n                grid,\n                geodata[i],\n                column[i],\n                subset_target_attribute_values[i],\n                add_name[i - 1],\n                add_buffer[i - 1],\n            )\n\n    # Export\n    _to_raster(cba, output_path)\n\n    return cba\n
    "},{"location":"vector_processing/distance_computation/","title":"Distance computation","text":""},{"location":"vector_processing/distance_computation/#eis_toolkit.vector_processing.distance_computation.distance_computation","title":"distance_computation(raster_profile, geometries)","text":"

    Calculate distance from raster cell to nearest geometry.

    Parameters:

    Name Type Description Default raster_profile Union[Profile, dict]

    The raster profile of the raster in which the distances to the nearest geometry are determined.

    required geometries GeoDataFrame

    The geometries to determine distance to.

    required

    Returns:

    Type Description ndarray

    A 2D numpy array with the distances computed.

    Source code in eis_toolkit/vector_processing/distance_computation.py
    @beartype\ndef distance_computation(raster_profile: Union[profiles.Profile, dict], geometries: gpd.GeoDataFrame) -> np.ndarray:\n    \"\"\"Calculate distance from raster cell to nearest geometry.\n\n    Args:\n        raster_profile: The raster profile of the raster in which the distances\n            to the nearest geometry are determined.\n        geometries: The geometries to determine distance to.\n\n    Returns:\n        A 2D numpy array with the distances computed.\n\n    \"\"\"\n    if raster_profile.get(\"crs\") != geometries.crs:\n        raise exceptions.NonMatchingCrsException(\"Expected coordinate systems to match between raster and geometries. \")\n    if geometries.shape[0] == 0:\n        raise exceptions.EmptyDataFrameException(\"Expected GeoDataFrame to not be empty.\")\n\n    raster_width = raster_profile.get(\"width\")\n    raster_height = raster_profile.get(\"height\")\n\n    if not isinstance(raster_width, int) or not isinstance(raster_height, int):\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected raster_profile to contain integer width and height. {raster_profile}\"\n        )\n\n    raster_transform = raster_profile.get(\"transform\")\n\n    if not isinstance(raster_transform, transform.Affine):\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected raster_profile to contain an affine transformation. {raster_profile}\"\n        )\n\n    return _distance_computation(\n        raster_width=raster_width, raster_height=raster_height, raster_transform=raster_transform, geometries=geometries\n    )\n
    "},{"location":"vector_processing/extract_shared_lines/","title":"Extract shared lines","text":""},{"location":"vector_processing/extract_shared_lines/#eis_toolkit.vector_processing.extract_shared_lines.extract_shared_lines","title":"extract_shared_lines(polygons)","text":"

    Extract shared lines/borders/edges between polygons.

    Parameters:

    Name Type Description Default polygons GeoDataFrame

    The geodataframe that contains the polygon geometries to be examined for shared lines.

    required

    Returns:

    Type Description GeoDataFrame

    Geodataframe containing the shared lines that were found between the polygons.

    Source code in eis_toolkit/vector_processing/extract_shared_lines.py
    @beartype\ndef extract_shared_lines(polygons: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n    \"\"\"Extract shared lines/borders/edges between polygons.\n\n    Args:\n        polygons: The geodataframe that contains the polygon geometries to be examined\n            for shared lines.\n\n    Returns:\n        Geodataframe containing the shared lines that were found between the polygons.\n\n    Raises:\n        EmptyDataFrameException if input geodataframe is empty.\n        InvalidParameterValueException if input geodataframe doesn't contain at least 2 polygons.\n    \"\"\"\n    if polygons.shape[0] == 0:\n        raise exceptions.EmptyDataFrameException(\"Geodataframe is empty.\")\n\n    if polygons.shape[0] < 2:\n        raise exceptions.InvalidParameterValueException(\"Expected GeoDataFrame to have at least 2 polygons.\")\n\n    shared_lines = _extract_shared_lines(polygons)\n\n    return shared_lines\n
    "},{"location":"vector_processing/idw_interpolation/","title":"IDW","text":""},{"location":"vector_processing/idw_interpolation/#eis_toolkit.vector_processing.idw_interpolation.idw","title":"idw(geodataframe, target_column, resolution, extent=None, power=2)","text":"

    Calculate inverse distance weighted (IDW) interpolation.

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The vector dataframe to be interpolated.

    required target_column str

    The column name with values for each geometry.

    required resolution Tuple[Number, Number]

    The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).

    required extent Optional[Tuple[Number, Number, Number, Number]]

    The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.

    None power Number

    The value for determining the rate at which the weights decrease. As power increases, the weights for distant points decrease rapidly. Defaults to 2.

    2

    Returns:

    Type Description Tuple[ndarray, dict]

    Rasterized vector data and metadata.

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterValueException

    Invalid resolution or target_column.

    Source code in eis_toolkit/vector_processing/idw_interpolation.py
    @beartype\ndef idw(\n    geodataframe: gpd.GeoDataFrame,\n    target_column: str,\n    resolution: Tuple[Number, Number],\n    extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n    power: Number = 2,\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Calculate inverse distance weighted (IDW) interpolation.\n\n    Args:\n        geodataframe: The vector dataframe to be interpolated.\n        target_column: The column name with values for each geometry.\n        resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n        extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n            If None, calculate extent from the input vector data.\n        power: The value for determining the rate at which the weights decrease.\n            As power increases, the weights for distant points decrease rapidly.\n            Defaults to 2.\n\n    Returns:\n        Rasterized vector data and metadata.\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterValueException: Invalid resolution or target_column.\n    \"\"\"\n\n    if geodataframe.shape[0] == 0:\n        raise EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n    if target_column not in geodataframe.columns:\n        raise InvalidParameterValueException(\n            f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n        )\n\n    if resolution[0] <= 0 or resolution[1] <= 0:\n        raise InvalidParameterValueException(\"Expected height and width greater than zero.\")\n\n    interpolated_values, out_meta = _idw_interpolation(geodataframe, target_column, resolution, power, extent)\n\n    return interpolated_values, out_meta\n
    "},{"location":"vector_processing/kriging_interpolation/","title":"Kriging interpolation","text":""},{"location":"vector_processing/kriging_interpolation/#eis_toolkit.vector_processing.kriging_interpolation.kriging","title":"kriging(data, target_column, resolution, extent=None, variogram_model='linear', coordinates_type='geographic', method='ordinary')","text":"

    Perform Kriging interpolation on the input data.

    Parameters:

    Name Type Description Default data GeoDataFrame

    GeoDataFrame containing the input data.

    required target_column str

    The column name with values for each geometry.

    required resolution Tuple[Number, Number]

    The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).

    required extent Optional[Tuple[Number, Number, Number, Number]]

    The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.

    None variogram_model Literal[linear, power, gaussian, spherical, exponential]

    Variogram model to be used. Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.

    'linear' coordinates_type Literal[euclidean, geographic]

    Determines are coordinates on a plane ('euclidean') or a sphere ('geographic'). Used only in ordinary kriging. Defaults to 'geographic'.

    'geographic' method Literal[ordinary, universal]

    Ordinary or universal kriging. Defaults to 'ordinary'.

    'ordinary'

    Returns:

    Type Description Tuple[ndarray, dict]

    Grid containing the interpolated values and metadata.

    Raises:

    Type Description EmptyDataFrameException

    The input GeoDataFrame is empty.

    InvalidParameterValueException

    Target column name is invalid or resolution is not greater than zero.

    Source code in eis_toolkit/vector_processing/kriging_interpolation.py
    @beartype\ndef kriging(\n    data: gpd.GeoDataFrame,\n    target_column: str,\n    resolution: Tuple[Number, Number],\n    extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n    variogram_model: Literal[\"linear\", \"power\", \"gaussian\", \"spherical\", \"exponential\"] = \"linear\",\n    coordinates_type: Literal[\"euclidean\", \"geographic\"] = \"geographic\",\n    method: Literal[\"ordinary\", \"universal\"] = \"ordinary\",\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"\n    Perform Kriging interpolation on the input data.\n\n    Args:\n        data: GeoDataFrame containing the input data.\n        target_column: The column name with values for each geometry.\n        resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n        extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n            If None, calculate extent from the input vector data.\n        variogram_model: Variogram model to be used.\n            Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.\n        coordinates_type: Determines are coordinates on a plane ('euclidean') or a sphere ('geographic').\n            Used only in ordinary kriging. Defaults to 'geographic'.\n        method: Ordinary or universal kriging. Defaults to 'ordinary'.\n\n    Returns:\n        Grid containing the interpolated values and metadata.\n\n    Raises:\n        EmptyDataFrameException: The input GeoDataFrame is empty.\n        InvalidParameterValueException: Target column name is invalid or resolution is not greater than zero.\n    \"\"\"\n\n    if data.empty:\n        raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n    if target_column not in data.columns:\n        raise InvalidParameterValueException(\n            f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n        )\n\n    if resolution[0] <= 0 or resolution[1] <= 0:\n        raise InvalidParameterValueException(\"The resolution must be greater than zero.\")\n\n    data_interpolated, out_meta = _kriging(\n        data, target_column, resolution, extent, variogram_model, coordinates_type, method\n    )\n\n    return data_interpolated, out_meta\n
    "},{"location":"vector_processing/rasterize_vector/","title":"Rasterize vector","text":""},{"location":"vector_processing/rasterize_vector/#eis_toolkit.vector_processing.rasterize_vector.rasterize_vector","title":"rasterize_vector(geodataframe, resolution=None, value_column=None, default_value=1.0, fill_value=0.0, base_raster_profile=None, buffer_value=None, merge_strategy='replace')","text":"

    Transform vector data into raster data.

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The vector dataframe to be rasterized.

    required resolution Optional[float]

    The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.

    None value_column Optional[str]

    The column name with values for each geometry. If None, then default_value is used for all geometries.

    None default_value float

    Default value burned into raster cells based on geometries.

    1.0 base_raster_profile Optional[Union[Profile, dict]]

    Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.

    None fill_value float

    Value used outside the burned/rasterized geometry cells.

    0.0 buffer_value Optional[float]

    For adding a buffer around passed geometries before rasterization.

    None merge_strategy Literal[replace, add]

    How to handle overlapping geometries. \"add\" causes overlapping geometries to add together the values while \"replace\" does not. Adding them together is the basis for density computations where the density can be calculated by using a default value of 1.0 and the sum in each cell is the count of intersecting geometries.

    'replace'

    Returns:

    Type Description Tuple[ndarray, dict]

    Rasterized vector data and metadata.

    Source code in eis_toolkit/vector_processing/rasterize_vector.py
    @beartype\ndef rasterize_vector(\n    geodataframe: gpd.GeoDataFrame,\n    resolution: Optional[float] = None,\n    value_column: Optional[str] = None,\n    default_value: float = 1.0,\n    fill_value: float = 0.0,\n    base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n    buffer_value: Optional[float] = None,\n    merge_strategy: Literal[\"replace\", \"add\"] = \"replace\",\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Transform vector data into raster data.\n\n    Args:\n        geodataframe: The vector dataframe to be rasterized.\n        resolution: The resolution i.e. cell size of the output raster.\n            Optional if base_raster_profile is given.\n        value_column: The column name with values for each geometry.\n            If None, then default_value is used for all geometries.\n        default_value: Default value burned into raster cells based on geometries.\n        base_raster_profile: Base raster profile\n            to be used for determining the grid on which vectors are\n            burned in. If None, the geometries and provided resolution\n            value are used to compute grid.\n        fill_value: Value used outside the burned/rasterized geometry cells.\n        buffer_value: For adding a buffer around passed\n            geometries before rasterization.\n        merge_strategy: How to handle overlapping geometries.\n            \"add\" causes overlapping geometries to add together the\n            values while \"replace\" does not. Adding them together is the\n            basis for density computations where the density can be\n            calculated by using a default value of 1.0 and the sum in\n            each cell is the count of intersecting geometries.\n\n    Returns:\n        Rasterized vector data and metadata.\n    \"\"\"\n\n    if geodataframe.shape[0] == 0:\n        # Empty GeoDataFrame\n        raise exceptions.EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n    if resolution is None and base_raster_profile is None:\n        raise exceptions.InvalidParameterValueException(\n            \"Expected either resolution or base_raster_profile to be given.\"\n        )\n    if resolution is not None and resolution <= 0:\n        raise exceptions.NumericValueSignException(\n            f\"Expected a positive value resolution ({dict(resolution=resolution)})\"\n        )\n    if value_column is not None and value_column not in geodataframe.columns:\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected value_column ({value_column}) to be contained in geodataframe columns.\"\n        )\n    if buffer_value is not None and buffer_value < 0:\n        raise exceptions.NumericValueSignException(\n            f\"Expected a positive buffer_value ({dict(buffer_value=buffer_value)})\"\n        )\n\n    if base_raster_profile is not None and not isinstance(base_raster_profile, (profiles.Profile, dict)):\n        raise exceptions.InvalidParameterValueException(\n            f\"Expected base_raster_profile ({type(base_raster_profile)}) to be dict or rasterio.profiles.Profile.\"\n        )\n\n    if buffer_value is not None:\n        geodataframe = geodataframe.copy()\n        geodataframe[\"geometry\"] = geodataframe[\"geometry\"].apply(lambda geom: geom.buffer(buffer_value))\n\n    return _rasterize_vector(\n        geodataframe=geodataframe,\n        value_column=value_column,\n        default_value=default_value,\n        fill_value=fill_value,\n        base_raster_profile=base_raster_profile,\n        resolution=resolution,\n        merge_alg=getattr(MergeAlg, merge_strategy),\n    )\n
    "},{"location":"vector_processing/reproject_vector/","title":"Reproject vector","text":""},{"location":"vector_processing/reproject_vector/#eis_toolkit.vector_processing.reproject_vector.reproject_vector","title":"reproject_vector(geodataframe, target_crs)","text":"

    Reprojects vector data to match given coordinate reference system (EPSG).

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The vector dataframe to be reprojected.

    required target_crs int

    Target CRS as an EPSG code.

    required

    Returns:

    Type Description GeoDataFrame

    Reprojected vector data.

    Source code in eis_toolkit/vector_processing/reproject_vector.py
    @beartype\ndef reproject_vector(geodataframe: geopandas.GeoDataFrame, target_crs: int) -> geopandas.GeoDataFrame:\n    \"\"\"Reprojects vector data to match given coordinate reference system (EPSG).\n\n    Args:\n        geodataframe: The vector dataframe to be reprojected.\n        target_crs: Target CRS as an EPSG code.\n\n    Returns:\n        Reprojected vector data.\n    \"\"\"\n\n    if geodataframe.crs.to_epsg() == target_crs:\n        raise MatchingCrsException(\"Vector data is already in the target CRS.\")\n\n    reprojected_gdf = geodataframe.to_crs(\"epsg:\" + str(target_crs))\n    return reprojected_gdf\n
    "},{"location":"vector_processing/vector_density/","title":"Vector density","text":""},{"location":"vector_processing/vector_density/#eis_toolkit.vector_processing.vector_density.vector_density","title":"vector_density(geodataframe, resolution=None, base_raster_profile=None, buffer_value=None, statistic='density')","text":"

    Compute density of geometries within raster.

    Parameters:

    Name Type Description Default geodataframe GeoDataFrame

    The dataframe with vectors of which density is computed.

    required resolution Optional[float]

    The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.

    None base_raster_profile Optional[Union[Profile, dict]]

    Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.

    None buffer_value Optional[float]

    For adding a buffer around passed geometries before computing density.

    None

    Returns:

    Type Description Tuple[ndarray, dict]

    Computed density of vector data and metadata.

    Source code in eis_toolkit/vector_processing/vector_density.py
    @beartype\ndef vector_density(\n    geodataframe: gpd.GeoDataFrame,\n    resolution: Optional[float] = None,\n    base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n    buffer_value: Optional[float] = None,\n    statistic: Literal[\"density\", \"count\"] = \"density\",\n) -> Tuple[np.ndarray, dict]:\n    \"\"\"Compute density of geometries within raster.\n\n    Args:\n        geodataframe: The dataframe with vectors\n            of which density is computed.\n        resolution: The resolution i.e. cell size of the output raster.\n            Optional if base_raster_profile is given.\n        base_raster_profile: Base raster profile\n            to be used for determining the grid on which vectors are\n            burned in. If None, the geometries and provided resolution\n            value are used to compute grid.\n        buffer_value: For adding a buffer around passed\n            geometries before computing density.\n\n    Returns:\n        Computed density of vector data and metadata.\n    \"\"\"\n    out_raster_array, out_metadata = rasterize_vector(\n        geodataframe=geodataframe,\n        resolution=resolution,\n        base_raster_profile=base_raster_profile,\n        buffer_value=buffer_value,\n        value_column=None,\n        default_value=1.0,\n        fill_value=0.0,\n        merge_strategy=\"add\",\n    )\n    max_count = np.max(out_raster_array)\n    if statistic == \"count\" or np.isclose(max_count, 0.0):\n        return out_raster_array, out_metadata\n    else:\n        return (out_raster_array / max_count), out_metadata\n
    "}]} \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 16053387..6812082f 100644 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ diff --git a/training_data_tools/class_balancing/index.html b/training_data_tools/class_balancing/index.html index 8bd362f1..4d430afd 100644 --- a/training_data_tools/class_balancing/index.html +++ b/training_data_tools/class_balancing/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/transformations/binarize/index.html b/transformations/binarize/index.html index 7567cc04..62f427eb 100644 --- a/transformations/binarize/index.html +++ b/transformations/binarize/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/transformations/clip/index.html b/transformations/clip/index.html index a60408ff..c9943a4c 100644 --- a/transformations/clip/index.html +++ b/transformations/clip/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/transformations/linear/index.html b/transformations/linear/index.html index 873c0027..219096ae 100644 --- a/transformations/linear/index.html +++ b/transformations/linear/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1069,6 +1089,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1355,6 +1395,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/transformations/logarithmic/index.html b/transformations/logarithmic/index.html index 9f664af9..74251543 100644 --- a/transformations/logarithmic/index.html +++ b/transformations/logarithmic/index.html @@ -12,7 +12,7 @@ - + @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1062,6 +1082,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/transformations/one_hot_encoding/index.html b/transformations/one_hot_encoding/index.html new file mode 100644 index 00000000..5244f015 --- /dev/null +++ b/transformations/one_hot_encoding/index.html @@ -0,0 +1,2094 @@ + + + + + + + + + + + + + + + + + + + + + + + One-hot encoding - EIS Toolkit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    One-hot encoding

    + + +
    + + + + +
    + + + +
    + + + + + + + + + + +
    + + + + +

    + one_hot_encode(data, columns=None, drop_original_columns=True, drop_category=None, sparse_output=True, out_dtype=int, handle_unknown='infrequent_if_exist', min_frequency=None, max_categories=None) + +

    + + +
    + +

    Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.

    +

    This function converts categorical variables into a form that could be provided to machine learning +algorithms for better prediction. For each unique category in the feature, a new binary column is created.

    +

    Continuous data should not be given to this function to avoid excessive amounts of binary features. If input +is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.

    +

    The function allows control over aspects like handling unknown categories, controlling sparsity of the output, +and setting data type of the encoded columns.

    + + + +

    Parameters:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    data + Union[DataFrame, ndarray] + +
    +

    Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be +restricted to specified columns.

    +
    +
    + required +
    columns + Optional[Sequence[str]] + +
    +

    Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are +considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.

    +
    +
    + None +
    drop_original_columns + bool + +
    +

    If True and 'data' is a DataFrame, the original columns being encoded will +be dropped from the output. Defaults to True.

    +
    +
    + True +
    drop_category + Optional[Literal[first, if_binary]] + +
    +

    Specifies a method to drop one of the categories to avoid multicollinearity. +'first' drops the first category, 'if_binary' drops one category only if the feature is binary. +If None, no category is dropped. Defaults to None.

    +
    +
    + None +
    sparse_output + bool + +
    +

    Determines whether the output matrix is sparse or dense. Defaults to True (sparse).

    +
    +
    + True +
    out_dtype + Union[type, dtype] + +
    +

    Numeric data type of the output. Defaults to int.

    +
    +
    + int +
    handle_unknown + Literal[error, ignore, infrequent_if_exist] + +
    +

    Specifies how to handle unknown categories encountered during transform. 'error' raises +an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent. +Defaults to 'infrequent_if_exist'.

    +
    +
    + 'infrequent_if_exist' +
    min_frequency + Optional[Number] + +
    +

    The minimum frequency (as a float or an int) needed to include a category in encoding. +Optional parameter. Defaults to None.

    +
    +
    + None +
    max_categories + Optional[int] + +
    +

    The maximum number of categories to include in encoding. Optional parameter. +Defaults to None.

    +
    +
    + None +
    + + + +

    Returns:

    + + + + + + + + + + + + + +
    TypeDescription
    + Union[DataFrame, ndarray, csr_matrix] + +
    +

    Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse) +if input was a Numpy array.

    +
    +
    + + + +

    Raises:

    + + + + + + + + + + + + + + + + + + + + + +
    TypeDescription
    + EmptyDataFrameException + +
    +

    If the input DataFrame is empty.

    +
    +
    + InvalidDatasetException + +
    +

    If the input Numpy array is empty.

    +
    +
    + InvalidColumnException + +
    +

    If any specified column to encode does not exist in the input DataFrame.

    +
    +
    + +
    + Source code in eis_toolkit/transformations/one_hot_encoding.py +
     14
    + 15
    + 16
    + 17
    + 18
    + 19
    + 20
    + 21
    + 22
    + 23
    + 24
    + 25
    + 26
    + 27
    + 28
    + 29
    + 30
    + 31
    + 32
    + 33
    + 34
    + 35
    + 36
    + 37
    + 38
    + 39
    + 40
    + 41
    + 42
    + 43
    + 44
    + 45
    + 46
    + 47
    + 48
    + 49
    + 50
    + 51
    + 52
    + 53
    + 54
    + 55
    + 56
    + 57
    + 58
    + 59
    + 60
    + 61
    + 62
    + 63
    + 64
    + 65
    + 66
    + 67
    + 68
    + 69
    + 70
    + 71
    + 72
    + 73
    + 74
    + 75
    + 76
    + 77
    + 78
    + 79
    + 80
    + 81
    + 82
    + 83
    + 84
    + 85
    + 86
    + 87
    + 88
    + 89
    + 90
    + 91
    + 92
    + 93
    + 94
    + 95
    + 96
    + 97
    + 98
    + 99
    +100
    +101
    +102
    +103
    +104
    +105
    +106
    +107
    +108
    +109
    +110
    +111
    @beartype
    +def one_hot_encode(
    +    data: Union[pd.DataFrame, np.ndarray],
    +    columns: Optional[Sequence[str]] = None,
    +    drop_original_columns: bool = True,
    +    drop_category: Optional[Literal["first", "if_binary"]] = None,
    +    sparse_output: bool = True,
    +    out_dtype: Union[type, np.dtype] = int,
    +    handle_unknown: Literal["error", "ignore", "infrequent_if_exist"] = "infrequent_if_exist",
    +    min_frequency: Optional[Number] = None,
    +    max_categories: Optional[int] = None,
    +) -> Union[pd.DataFrame, np.ndarray, sparse._csr.csr_matrix]:
    +    """
    +    Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.
    +
    +    This function converts categorical variables into a form that could be provided to machine learning
    +    algorithms for better prediction. For each unique category in the feature, a new binary column is created.
    +
    +    Continuous data should not be given to this function to avoid excessive amounts of binary features. If input
    +    is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.
    +
    +    The function allows control over aspects like handling unknown categories, controlling sparsity of the output,
    +    and setting data type of the encoded columns.
    +
    +    Args:
    +        data: Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be
    +            restricted to specified columns.
    +        columns: Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are
    +            considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.
    +        drop_original_columns: If True and 'data' is a DataFrame, the original columns being encoded will
    +            be dropped from the output. Defaults to True.
    +        drop_category: Specifies a method to drop one of the categories to avoid multicollinearity.
    +            'first' drops the first category, 'if_binary' drops one category only if the feature is binary.
    +            If None, no category is dropped. Defaults to None.
    +        sparse_output: Determines whether the output matrix is sparse or dense. Defaults to True (sparse).
    +        out_dtype: Numeric data type of the output. Defaults to int.
    +        handle_unknown: Specifies how to handle unknown categories encountered during transform. 'error' raises
    +            an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent.
    +            Defaults to 'infrequent_if_exist'.
    +        min_frequency: The minimum frequency (as a float or an int) needed to include a category in encoding.
    +            Optional parameter. Defaults to None.
    +        max_categories: The maximum number of categories to include in encoding. Optional parameter.
    +            Defaults to None.
    +
    +    Returns:
    +        Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse)
    +            if input was a Numpy array.
    +
    +    Raises:
    +        EmptyDataFrameException: If the input DataFrame is empty.
    +        InvalidDatasetException: If the input Numpy array is empty.
    +        InvalidColumnException: If any specified column to encode does not exist in the input DataFrame.
    +    """
    +    is_dataframe = isinstance(data, pd.DataFrame)
    +
    +    if is_dataframe:
    +        if data.empty:
    +            raise exceptions.EmptyDataFrameException("Input DataFrame is empty.")
    +        df = data.copy()
    +
    +        if columns is not None:
    +            if not check_columns_valid(df, columns):
    +                raise exceptions.InvalidColumnException("All selected columns were not found in the input DataFrame.")
    +            transform_df = df[columns]
    +        else:
    +            transform_df = df
    +    else:
    +        if data.size == 0:
    +            raise exceptions.InvalidDatasetException("Input array is empty.")
    +        transform_df = pd.DataFrame(data)
    +
    +    encoder = OneHotEncoder(
    +        drop=drop_category,
    +        sparse_output=sparse_output,
    +        dtype=out_dtype,
    +        handle_unknown=handle_unknown,
    +        min_frequency=min_frequency,
    +        max_categories=max_categories,
    +        feature_name_combiner=lambda feature, category: str(feature) + "_" + str(category),
    +    )
    +
    +    # Transform selected columns
    +    encoded_data = encoder.fit_transform(transform_df)
    +    encoded_cols = encoder.get_feature_names_out(transform_df.columns)
    +
    +    # If input was a DataFrame, create output DataFrame
    +    if is_dataframe:
    +        if sparse_output:
    +            encoded_df = pd.DataFrame.sparse.from_spmatrix(encoded_data, columns=encoded_cols, index=df.index)
    +        else:
    +            encoded_df = pd.DataFrame(encoded_data, columns=encoded_cols, index=df.index)
    +
    +        if drop_original_columns:
    +            df = df.drop(transform_df.columns, axis=1)
    +
    +        encoded_data = pd.concat([df, encoded_df], axis=1)
    +
    +    return encoded_data
    +
    +
    +
    + +
    + + + +
    + +
    + +
    + + + + + + +
    +
    + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/transformations/sigmoid/index.html b/transformations/sigmoid/index.html index 5fecb618..fddb053b 100644 --- a/transformations/sigmoid/index.html +++ b/transformations/sigmoid/index.html @@ -9,7 +9,7 @@ - + @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1007,6 +1027,26 @@ + +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/transformations/winsorize/index.html b/transformations/winsorize/index.html index 028a8bdc..92d19ac5 100644 --- a/transformations/winsorize/index.html +++ b/transformations/winsorize/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1008,6 +1028,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/validation/calculate_auc/index.html b/validation/calculate_auc/index.html index 9f5bae75..e92314d3 100644 --- a/validation/calculate_auc/index.html +++ b/validation/calculate_auc/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1564,7 +1624,7 @@

    ndarray + Union[ndarray, Series]
    @@ -1578,7 +1638,7 @@

    ndarray + Union[ndarray, Series]
    @@ -1642,10 +1702,7 @@

    Source code in eis_toolkit/validation/calculate_auc.py -
    13
    -14
    -15
    -16
    +            
    16
     17
     18
     19
    @@ -1666,8 +1723,11 @@ 

    34 35 36 -37

    @beartype
    -def calculate_auc(x_values: np.ndarray, y_values: np.ndarray) -> float:
    +37
    +38
    +39
    +40
    @beartype
    +def calculate_auc(x_values: Union[np.ndarray, pd.Series], y_values: Union[np.ndarray, pd.Series]) -> float:
         """Calculate area under curve (AUC).
     
         Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be
    diff --git a/validation/calculate_base_metrics/index.html b/validation/calculate_base_metrics/index.html
    index 18dc72c3..d7cd8a6e 100644
    --- a/validation/calculate_base_metrics/index.html
    +++ b/validation/calculate_base_metrics/index.html
    @@ -800,6 +800,26 @@
       
       
       
    +    
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/validation/get_pa_intersection/index.html b/validation/get_pa_intersection/index.html index 7ffb3e6a..53b20928 100644 --- a/validation/get_pa_intersection/index.html +++ b/validation/get_pa_intersection/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1564,7 +1624,7 @@

    ndarray + Union[ndarray, Series]

  • @@ -1578,7 +1638,7 @@

    ndarray + Union[ndarray, Series]

    @@ -1592,7 +1652,7 @@

    ndarray + Union[ndarray, Series]

    @@ -1656,12 +1716,7 @@

    Source code in eis_toolkit/validation/get_pa_intersection.py -
    20
    -21
    -22
    -23
    -24
    -25
    +            
    25
     26
     27
     28
    @@ -1688,9 +1743,18 @@ 

    49 50 51 -52

    @beartype
    +52
    +53
    +54
    +55
    +56
    +57
    +58
    +59
    @beartype
     def get_pa_intersection(
    -    true_positive_rate_values: np.ndarray, proportion_of_area_values: np.ndarray, threshold_values: np.ndarray
    +    true_positive_rate_values: Union[np.ndarray, pd.Series],
    +    proportion_of_area_values: Union[np.ndarray, pd.Series],
    +    threshold_values: Union[np.ndarray, pd.Series],
     ) -> Tuple[float, float]:
         """Calculate the intersection point for prediction rate and area curves in (P-A plot).
     
    diff --git a/validation/plot_correlation_matrix/index.html b/validation/plot_correlation_matrix/index.html
    index 8b0ea8f6..6bdcc6e3 100644
    --- a/validation/plot_correlation_matrix/index.html
    +++ b/validation/plot_correlation_matrix/index.html
    @@ -800,6 +800,26 @@
       
       
       
    +    
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/validation/plot_prediction_area_curves/index.html b/validation/plot_prediction_area_curves/index.html index 46a329e8..c8d5400d 100644 --- a/validation/plot_prediction_area_curves/index.html +++ b/validation/plot_prediction_area_curves/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1564,7 +1624,7 @@

    ndarray + Union[ndarray, Series]

  • @@ -1578,7 +1638,7 @@

    ndarray + Union[ndarray, Series]

    @@ -1592,7 +1652,7 @@

    ndarray + Union[ndarray, Series]

    @@ -1661,12 +1721,7 @@

    Source code in eis_toolkit/validation/plot_prediction_area_curves.py -
    41
    -42
    -43
    -44
    -45
    -46
    +            
    46
     47
     48
     49
    @@ -1696,9 +1751,18 @@ 

    73 74 75 -76

    @beartype
    +76
    +77
    +78
    +79
    +80
    +81
    +82
    +83
    @beartype
     def plot_prediction_area_curves(
    -    true_positive_rate_values: np.ndarray, proportion_of_area_values: np.ndarray, threshold_values: np.ndarray
    +    true_positive_rate_values: Union[np.ndarray, pd.Series],
    +    proportion_of_area_values: Union[np.ndarray, pd.Series],
    +    threshold_values: Union[np.ndarray, pd.Series],
     ) -> matplotlib.figure.Figure:
         """Plot prediction-area (P-A) plot.
     
    diff --git a/validation/plot_rate_curve/index.html b/validation/plot_rate_curve/index.html
    index 6aa16f67..794b76af 100644
    --- a/validation/plot_rate_curve/index.html
    +++ b/validation/plot_rate_curve/index.html
    @@ -800,6 +800,26 @@
       
       
       
    +    
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1564,7 +1624,7 @@

    ndarray + Union[ndarray, Series]

  • @@ -1578,7 +1638,7 @@

    ndarray + Union[ndarray, Series]

    @@ -1592,7 +1652,7 @@

    Literal['success_rate', 'prediction_rate', 'roc']

    @@ -1666,12 +1726,7 @@

    Source code in eis_toolkit/validation/plot_rate_curve.py -
    27
    -28
    -29
    -30
    -31
    -32
    +            
    32
     33
     34
     35
    @@ -1709,11 +1764,16 @@ 

    67 68 69 -70

    @beartype
    +70
    +71
    +72
    +73
    +74
    +75
    @beartype
     def plot_rate_curve(
    -    x_values: np.ndarray,
    -    y_values: np.ndarray,
    -    plot_type: str = "success_rate",
    +    x_values: Union[np.ndarray, pd.Series],
    +    y_values: Union[np.ndarray, pd.Series],
    +    plot_type: Literal["success_rate", "prediction_rate", "roc"] = "success_rate",
     ) -> matplotlib.figure.Figure:
         """Plot success rate, prediction rate or ROC curve.
     
    diff --git a/vector_processing/cell_based_association/index.html b/vector_processing/cell_based_association/index.html
    index 65dcf903..d435c546 100644
    --- a/vector_processing/cell_based_association/index.html
    +++ b/vector_processing/cell_based_association/index.html
    @@ -800,6 +800,26 @@
       
       
       
    +    
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1704,10 +1764,7 @@

    Source code in eis_toolkit/vector_processing/cell_based_association.py -
     18
    - 19
    - 20
    - 21
    +            
     21
      22
      23
      24
    @@ -1801,7 +1858,10 @@ 

    112 113 114 -115

    @beartype
    +115
    +116
    +117
    +118
    @beartype
     def cell_based_association(
         cell_size: int,
         geodata: List[gpd.GeoDataFrame],
    diff --git a/vector_processing/distance_computation/index.html b/vector_processing/distance_computation/index.html
    index 46c84bb4..bf00db09 100644
    --- a/vector_processing/distance_computation/index.html
    +++ b/vector_processing/distance_computation/index.html
    @@ -12,7 +12,7 @@
             
           
           
    -        
    +        
           
           
           
    @@ -800,6 +800,26 @@
       
       
       
    +    
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1348,6 +1388,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • @@ -1562,7 +1622,7 @@

  • raster_profile - Union[Profile, dict] + Union[Profile, dict]
    @@ -1617,7 +1677,16 @@

    Source code in eis_toolkit/vector_processing/distance_computation.py -
    21
    +            
    12
    +13
    +14
    +15
    +16
    +17
    +18
    +19
    +20
    +21
     22
     23
     24
    @@ -1643,16 +1712,7 @@ 

    44 45 46 -47 -48 -49 -50 -51 -52 -53 -54 -55 -56

    @beartype
    +47
    @beartype
     def distance_computation(raster_profile: Union[profiles.Profile, dict], geometries: gpd.GeoDataFrame) -> np.ndarray:
         """Calculate distance from raster cell to nearest geometry.
     
    diff --git a/vector_processing/extract_shared_lines/index.html b/vector_processing/extract_shared_lines/index.html
    new file mode 100644
    index 00000000..75336633
    --- /dev/null
    +++ b/vector_processing/extract_shared_lines/index.html
    @@ -0,0 +1,1775 @@
    +
    +
    +
    +  
    +    
    +      
    +      
    +      
    +      
    +      
    +      
    +        
    +      
    +      
    +        
    +      
    +      
    +      
    +      
    +    
    +    
    +      
    +        Extract shared lines - EIS Toolkit
    +      
    +    
    +    
    +      
    +      
    +      
    +
    +
    +    
    +    
    +      
    +    
    +    
    +      
    +        
    +        
    +        
    +        
    +        
    +      
    +    
    +    
    +      
    +    
    +      
    +    
    +    
    +    
    +      
    +
    +    
    +    
    +    
    +  
    +  
    +  
    +    
    +  
    +    
    +    
    +      
    +    
    +    
    +    
    +    
    +    
    +    
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    Extract shared lines

    + + +
    + + + + +
    + + + +
    + + + + + + + + + + +
    + + + + +

    + extract_shared_lines(polygons) + +

    + + +
    + +

    Extract shared lines/borders/edges between polygons.

    + + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    polygons + GeoDataFrame + +
    +

    The geodataframe that contains the polygon geometries to be examined +for shared lines.

    +
    +
    + required +
    + + + +

    Returns:

    + + + + + + + + + + + + + +
    TypeDescription
    + GeoDataFrame + +
    +

    Geodataframe containing the shared lines that were found between the polygons.

    +
    +
    + +
    + Source code in eis_toolkit/vector_processing/extract_shared_lines.py +
    30
    +31
    +32
    +33
    +34
    +35
    +36
    +37
    +38
    +39
    +40
    +41
    +42
    +43
    +44
    +45
    +46
    +47
    +48
    +49
    +50
    +51
    +52
    +53
    @beartype
    +def extract_shared_lines(polygons: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    +    """Extract shared lines/borders/edges between polygons.
    +
    +    Args:
    +        polygons: The geodataframe that contains the polygon geometries to be examined
    +            for shared lines.
    +
    +    Returns:
    +        Geodataframe containing the shared lines that were found between the polygons.
    +
    +    Raises:
    +        EmptyDataFrameException if input geodataframe is empty.
    +        InvalidParameterValueException if input geodataframe doesn't contain at least 2 polygons.
    +    """
    +    if polygons.shape[0] == 0:
    +        raise exceptions.EmptyDataFrameException("Geodataframe is empty.")
    +
    +    if polygons.shape[0] < 2:
    +        raise exceptions.InvalidParameterValueException("Expected GeoDataFrame to have at least 2 polygons.")
    +
    +    shared_lines = _extract_shared_lines(polygons)
    +
    +    return shared_lines
    +
    +
    +
    + +
    + + + +
    + +
    + +
    + + + + + + +
    +
    + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/vector_processing/idw_interpolation/index.html b/vector_processing/idw_interpolation/index.html index da61d3f5..34e1a0c2 100644 --- a/vector_processing/idw_interpolation/index.html +++ b/vector_processing/idw_interpolation/index.html @@ -9,7 +9,7 @@ - + @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1293,6 +1333,26 @@ + +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + @@ -1695,7 +1755,18 @@

    Source code in eis_toolkit/vector_processing/idw_interpolation.py -
     79
    +            
     68
    + 69
    + 70
    + 71
    + 72
    + 73
    + 74
    + 75
    + 76
    + 77
    + 78
    + 79
      80
      81
      82
    @@ -1725,18 +1796,7 @@ 

    106 107 108 -109 -110 -111 -112 -113 -114 -115 -116 -117 -118 -119 -120

    @beartype
    +109
    @beartype
     def idw(
         geodataframe: gpd.GeoDataFrame,
         target_column: str,
    diff --git a/vector_processing/kriging_interpolation/index.html b/vector_processing/kriging_interpolation/index.html
    index 3e051b41..586169a3 100644
    --- a/vector_processing/kriging_interpolation/index.html
    +++ b/vector_processing/kriging_interpolation/index.html
    @@ -800,6 +800,26 @@
       
       
       
    +    
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1294,6 +1334,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/vector_processing/rasterize_vector/index.html b/vector_processing/rasterize_vector/index.html index 7ed609de..50ca530a 100644 --- a/vector_processing/rasterize_vector/index.html +++ b/vector_processing/rasterize_vector/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1294,6 +1334,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/vector_processing/reproject_vector/index.html b/vector_processing/reproject_vector/index.html index 2fadec0d..35a17262 100644 --- a/vector_processing/reproject_vector/index.html +++ b/vector_processing/reproject_vector/index.html @@ -800,6 +800,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1006,6 +1026,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1294,6 +1334,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +
  • diff --git a/vector_processing/vector_density/index.html b/vector_processing/vector_density/index.html index 9609221c..dcac0cfc 100644 --- a/vector_processing/vector_density/index.html +++ b/vector_processing/vector_density/index.html @@ -798,6 +798,26 @@ +
  • + + + + + Unique combinations in rasters + + + + +
  • + + + + + + + + +
  • @@ -1004,6 +1024,26 @@ +
  • + + + + + One-hot encoding + + + + +
  • + + + + + + + + +
  • @@ -1292,6 +1332,26 @@ +
  • + + + + + Extract shared lines + + + + +
  • + + + + + + + + +