diff --git a/search/search_index.json b/search/search_index.json
index 9fa1bb68..b6009a75 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"General","text":"This is the documentation site of the eis_toolkit python package. Here you can find documentation for each module. The documentation is automatically generated from docstrings.
Development of eis_toolkit is related to EIS Horizon EU project.
"},{"location":"dependency_licenses/","title":"Dependency licenses","text":"Name Version License protobuf 3.19.4 3-Clause BSD License tensorboard-plugin-wit 1.8.1 Apache 2.0 absl-py 1.2.0 Apache Software License flatbuffers 1.12 Apache Software License ghp-import 2.1.0 Apache Software License google-auth 2.11.0 Apache Software License google-auth-oauthlib 0.4.6 Apache Software License google-pasta 0.2.0 Apache Software License grpcio 1.48.1 Apache Software License importlib-metadata 4.12.0 Apache Software License keras 2.9.0 Apache Software License libclang 14.0.6 Apache Software License requests 2.28.1 Apache Software License rsa 4.9 Apache Software License tenacity 8.2.2 Apache Software License tensorboard 2.9.1 Apache Software License tensorboard-data-server 0.6.1 Apache Software License tensorflow 2.9.2 Apache Software License tensorflow-estimator 2.9.0 Apache Software License tensorflow-io-gcs-filesystem 0.26.0 Apache Software License watchdog 2.1.9 Apache Software License packaging 21.3 Apache Software License; BSD License python-dateutil 2.8.2 Apache Software License; BSD License affine 2.3.1 BSD cligj 0.7.2 BSD geopandas 0.11.1 BSD Fiona 1.8.21 BSD License Jinja2 3.1.2 BSD License Markdown 3.3.7 BSD License MarkupSafe 2.1.1 BSD License Pygments 2.13.0 BSD License Shapely 1.8.4 BSD License Werkzeug 2.2.2 BSD License astunparse 1.6.3 BSD License click 8.1.3 BSD License click-plugins 1.1.1 BSD License cycler 0.11.0 BSD License gast 0.4.0 BSD License h5py 3.7.0 BSD License idna 3.3 BSD License joblib 1.1.0 BSD License kiwisolver 1.4.4 BSD License mkdocs 1.3.1 BSD License numpy 1.23.2 BSD License oauthlib 3.2.0 BSD License pandas 1.4.4 BSD License patsy 0.5.2 BSD License pyasn1 0.4.8 BSD License pyasn1-modules 0.2.8 BSD License rasterio 1.3.2 BSD License requests-oauthlib 1.3.1 BSD License scikit-learn 1.1.2 BSD License scipy 1.9.1 BSD License statsmodels 0.13.2 BSD License threadpoolctl 3.1.0 BSD License wrapt 1.14.1 BSD License eis-toolkit 0.1.0 European Union Public Licence 1.2 (EUPL 1.2) Pillow 9.2.0 Historical Permission Notice and Disclaimer (HPND) opt-einsum 3.3.0 MIT snuggs 1.4.7 MIT GDAL 3.4.3 MIT License Keras-Preprocessing 1.1.2 MIT License PyYAML 6.0 MIT License attrs 22.1.0 MIT License cachetools 5.2.0 MIT License charset-normalizer 2.1.1 MIT License fonttools 4.37.1 MIT License mergedeep 1.3.4 MIT License mkdocs-material 8.4.2 MIT License mkdocs-material-extensions 1.0.3 MIT License munch 2.5.0 MIT License plotly 5.14.0 MIT License pymdown-extensions 9.5 MIT License pyparsing 3.0.9 MIT License pyproj 3.3.1 MIT License pytz 2022.2.1 MIT License pyyaml_env_tag 0.1 MIT License setuptools-scm 6.4.2 MIT License six 1.16.0 MIT License termcolor 1.1.0 MIT License tomli 2.0.1 MIT License urllib3 1.26.12 MIT License zipp 3.8.1 MIT License certifi 2022.6.15 Mozilla Public License 2.0 (MPL 2.0) matplotlib 3.5.3 Python Software Foundation License typing_extensions 4.3.0 Python Software Foundation License"},{"location":"conversions/csv_to_geodataframe/","title":"Convert csv to geodataframe","text":""},{"location":"conversions/csv_to_geodataframe/#eis_toolkit.conversions.csv_to_geodataframe.csv_to_geodataframe","title":"csv_to_geodataframe(csv, indexes, target_crs)
","text":"Read CSV file to a GeoDataFrame.
Usage of single index expects valid WKT geometry. Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.
Parameters:
Name Type Description Default csv
Path
Path to the .csv file to be read.
required indexes
Sequence[int]
Index(es) of the geometry column(s).
required target_crs
int
Target CRS as an EPSG code.
required Returns:
Type Description GeoDataFrame
CSV file read to a GeoDataFrame.
Source code in eis_toolkit/conversions/csv_to_geodataframe.py
@beartype\ndef csv_to_geodataframe(\n csv: Path,\n indexes: Sequence[int],\n target_crs: int,\n) -> geopandas.GeoDataFrame:\n \"\"\"\n Read CSV file to a GeoDataFrame.\n\n Usage of single index expects valid WKT geometry.\n Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.\n\n Args:\n csv: Path to the .csv file to be read.\n indexes: Index(es) of the geometry column(s).\n target_crs: Target CRS as an EPSG code.\n\n Returns:\n CSV file read to a GeoDataFrame.\n \"\"\"\n\n data_frame = _csv_to_geodataframe(\n csv=csv,\n indexes=indexes,\n target_crs=target_crs,\n )\n return data_frame\n
"},{"location":"conversions/raster_to_dataframe/","title":"Convert raster to dataframe","text":""},{"location":"conversions/raster_to_dataframe/#eis_toolkit.conversions.raster_to_dataframe.raster_to_dataframe","title":"raster_to_dataframe(raster, bands=None, add_coordinates=False)
","text":"Convert raster to Pandas DataFrame.
If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g., band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to dataframe by setting add_coordinates to True.
Parameters:
Name Type Description Default raster
DatasetReader
Raster to be converted.
required bands
Optional[Sequence[int]]
Selected bands from multiband raster. Indexing begins from one. Defaults to None.
None
add_coordinates
bool
Determines if pixel coordinates are written into dataframe. Defaults to False.
False
Returns:
Type Description DataFrame
Raster converted to a DataFrame.
Source code in eis_toolkit/conversions/raster_to_dataframe.py
@beartype\ndef raster_to_dataframe(\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n add_coordinates: bool = False,\n) -> pd.DataFrame:\n \"\"\"Convert raster to Pandas DataFrame.\n\n If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g.,\n band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to\n dataframe by setting add_coordinates to True.\n\n Args:\n raster: Raster to be converted.\n bands: Selected bands from multiband raster. Indexing begins from one. Defaults to None.\n add_coordinates: Determines if pixel coordinates are written into dataframe. Defaults to False.\n\n Returns:\n Raster converted to a DataFrame.\n \"\"\"\n\n data_frame = _raster_to_dataframe(\n raster=raster,\n bands=bands,\n add_coordinates=add_coordinates,\n )\n return data_frame\n
"},{"location":"exploratory_analyses/dbscan/","title":"DBSCAN","text":""},{"location":"exploratory_analyses/dbscan/#eis_toolkit.exploratory_analyses.dbscan.dbscan","title":"dbscan(data, max_distance=0.5, min_samples=5)
","text":"Perform DBSCAN clustering on the input data.
Parameters:
Name Type Description Default data
GeoDataFrame
GeoDataFrame containing the input data.
required max_distance
float
The maximum distance between two samples for one to be considered as in the neighborhood of the other. Defaults to 0.5.
0.5
min_samples
int
The number of samples in a neighborhood for a point to be considered as a core point. Defaults to 5.
5
Returns:
Type Description GeoDataFrame
GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a point is a core point (1) or not (0).
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterException
The maximum distance between two samples in a neighborhood is not greater than zero or the number of samples in a neighborhood is not greater than one.
Source code in eis_toolkit/exploratory_analyses/dbscan.py
@beartype\ndef dbscan(data: gdp.GeoDataFrame, max_distance: float = 0.5, min_samples: int = 5) -> gdp.GeoDataFrame:\n \"\"\"\n Perform DBSCAN clustering on the input data.\n\n Args:\n data: GeoDataFrame containing the input data.\n max_distance: The maximum distance between two samples for one to be considered as in the neighborhood of\n the other. Defaults to 0.5.\n min_samples: The number of samples in a neighborhood for a point to be considered as a core point.\n Defaults to 5.\n\n Returns:\n GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a\n point is a core point (1) or not (0).\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterException: The maximum distance between two samples in a neighborhood is not greater\n than zero or the number of samples in a neighborhood is not greater than one.\n \"\"\"\n\n if data.empty:\n raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if max_distance <= 0:\n raise InvalidParameterValueException(\n \"The input value for the maximum distance between two samples in a neighborhood must be greater than zero.\"\n )\n\n if min_samples <= 1:\n raise InvalidParameterValueException(\n \"The input value for the minimum number of samples in a neighborhood must be greater than one.\"\n )\n\n dbscan_gdf = _dbscan(data, max_distance, min_samples)\n\n return dbscan_gdf\n
"},{"location":"exploratory_analyses/descriptive_statistics/","title":"Descriptive statistics","text":""},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_dataframe","title":"descriptive_statistics_dataframe(input_data, column)
","text":"Generate descriptive statistics from vector data.
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
Parameters:
Name Type Description Default input_data
Union[DataFrame, GeoDataFrame]
Data to generate descriptive statistics from.
required column
str
Specify the column to generate descriptive statistics from.
required Returns:
Type Description dict
The descriptive statistics in previously described order.
Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
@beartype\ndef descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str) -> dict:\n \"\"\"Generate descriptive statistics from vector data.\n\n Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n\n Args:\n input_data: Data to generate descriptive statistics from.\n column: Specify the column to generate descriptive statistics from.\n\n Returns:\n The descriptive statistics in previously described order.\n \"\"\"\n if column not in input_data.columns:\n raise InvalidColumnException\n data = input_data[column]\n statistics = _descriptive_statistics(data)\n return statistics\n
"},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_raster","title":"descriptive_statistics_raster(input_data)
","text":"Generate descriptive statistics from raster data.
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
Parameters:
Name Type Description Default input_data
DatasetReader
Data to generate descriptive statistics from.
required Returns:
Type Description dict
The descriptive statistics in previously described order.
Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
@beartype\ndef descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:\n \"\"\"Generate descriptive statistics from raster data.\n\n Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n\n Args:\n input_data: Data to generate descriptive statistics from.\n\n Returns:\n The descriptive statistics in previously described order.\n \"\"\"\n data = input_data.read().flatten()\n statistics = _descriptive_statistics(data)\n return statistics\n
"},{"location":"exploratory_analyses/feature_importance/","title":"Feature importance","text":""},{"location":"exploratory_analyses/feature_importance/#eis_toolkit.exploratory_analyses.feature_importance.evaluate_feature_importance","title":"evaluate_feature_importance(classifier, x_test, y_test, feature_names, number_of_repetition=50, random_state=0)
","text":"Evaluate the feature importance of a sklearn classifier or linear model.
Parameters:
Name Type Description Default classifier
BaseEstimator
Trained classifier.
required x_test
ndarray
Testing feature data (X data need to be normalized / standardized).
required y_test
ndarray
Testing target data.
required feature_names
Sequence[str]
Names of the feature columns.
required number_of_repetition
int
Number of iteration used when calculate feature importance (default 50).
50
random_state
int
random state for repeatability of results (Default 0).
0
Return: A dataframe composed by features name and Importance value The resulted object with importance mean, importance std, and overall importance Raises: InvalidDatasetException: When the dataset is None.
Source code in eis_toolkit/exploratory_analyses/feature_importance.py
@beartype\ndef evaluate_feature_importance(\n classifier: sklearn.base.BaseEstimator,\n x_test: np.ndarray,\n y_test: np.ndarray,\n feature_names: Sequence[str],\n number_of_repetition: int = 50,\n random_state: int = 0,\n) -> tuple[pd.DataFrame, dict]:\n \"\"\"\n Evaluate the feature importance of a sklearn classifier or linear model.\n\n Parameters:\n classifier: Trained classifier.\n x_test: Testing feature data (X data need to be normalized / standardized).\n y_test: Testing target data.\n feature_names: Names of the feature columns.\n number_of_repetition: Number of iteration used when calculate feature importance (default 50).\n random_state: random state for repeatability of results (Default 0).\n Return:\n A dataframe composed by features name and Importance value\n The resulted object with importance mean, importance std, and overall importance\n Raises:\n InvalidDatasetException: When the dataset is None.\n \"\"\"\n\n if x_test is None or y_test is None:\n raise InvalidDatasetException\n\n result = permutation_importance(\n classifier, x_test, y_test.ravel(), n_repeats=number_of_repetition, random_state=random_state\n )\n\n feature_importance = pd.DataFrame({\"Feature\": feature_names, \"Importance\": result.importances_mean})\n\n feature_importance[\"Importance\"] = feature_importance[\"Importance\"] * 100\n feature_importance = feature_importance.sort_values(by=\"Importance\", ascending=False)\n\n return feature_importance, result\n
"},{"location":"exploratory_analyses/k_means_cluster/","title":"K-means clustering","text":""},{"location":"exploratory_analyses/k_means_cluster/#eis_toolkit.exploratory_analyses.k_means_cluster.k_means_clustering","title":"k_means_clustering(data, number_of_clusters=None, random_state=None)
","text":"Perform k-means clustering on the input data.
Parameters:
Name Type Description Default data
GeoDataFrame
A GeoDataFrame containing the input data.
required number_of_clusters
Optional[int]
The number of clusters (>= 1) to form. Optional parameter. If not provided, optimal number of clusters is computed using the elbow method.
None
random_state
Optional[int]
A random number generation for centroid initialization to make the randomness deterministic. Optional parameter.
None
Returns:
Type Description GeoDataFrame
GeoDataFrame containing assigned cluster labels.
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterException
The number of clusters is less than one.
Source code in eis_toolkit/exploratory_analyses/k_means_cluster.py
@beartype\ndef k_means_clustering(\n data: gdp.GeoDataFrame, number_of_clusters: Optional[int] = None, random_state: Optional[int] = None\n) -> gdp.GeoDataFrame:\n \"\"\"\n Perform k-means clustering on the input data.\n\n Args:\n data: A GeoDataFrame containing the input data.\n number_of_clusters: The number of clusters (>= 1) to form. Optional parameter. If not provided,\n optimal number of clusters is computed using the elbow method.\n random_state: A random number generation for centroid initialization to make\n the randomness deterministic. Optional parameter.\n\n Returns:\n GeoDataFrame containing assigned cluster labels.\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterException: The number of clusters is less than one.\n \"\"\"\n\n if data.empty:\n raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if number_of_clusters is not None and number_of_clusters < 1:\n raise InvalidParameterValueException(\"The input value for number of clusters must be at least one.\")\n\n k_means_gdf = _k_means_clustering(data, number_of_clusters, random_state)\n\n return k_means_gdf\n
"},{"location":"exploratory_analyses/parallel_coordinates/","title":"Plot parallel coordinates","text":""},{"location":"exploratory_analyses/parallel_coordinates/#eis_toolkit.exploratory_analyses.parallel_coordinates.plot_parallel_coordinates","title":"plot_parallel_coordinates(df, color_column_name, plot_title=None, palette_name=None, curved_lines=True)
","text":"Plot a parallel coordinates plot.
Automatically removes all rows containing null/nan values. Tries to convert columns to numeric to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only the first 8 to plot.
Parameters:
Name Type Description Default df
DataFrame
The DataFrame to plot.
required color_column_name
str
The name of the column in df to use for color encoding.
required plot_title
Optional[str]
The title for the plot. Default is None.
None
palette_name
Optional[str]
The name of the color palette to use. Default is None.
None
curved_lines
bool
If True, the plot will have curved instead of straight lines. Default is True.
True
Returns:
Type Description Figure
A matplotlib figure containing the parallel coordinates plot.
Raises:
Type Description EmptyDataFrameException
Raised when the DataFrame is empty.
InvalidColumnException
Raised when the color column is not found in the DataFrame.
InconsistentDataTypesException
Raised when the color column has multiple data types.
Source code in eis_toolkit/exploratory_analyses/parallel_coordinates.py
@beartype\ndef plot_parallel_coordinates(\n df: pd.DataFrame,\n color_column_name: str,\n plot_title: Optional[str] = None,\n palette_name: Optional[str] = None,\n curved_lines: bool = True,\n) -> matplotlib.figure.Figure:\n \"\"\"Plot a parallel coordinates plot.\n\n Automatically removes all rows containing null/nan values. Tries to convert columns to numeric\n to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only\n the first 8 to plot.\n\n Args:\n df: The DataFrame to plot.\n color_column_name: The name of the column in df to use for color encoding.\n plot_title: The title for the plot. Default is None.\n palette_name: The name of the color palette to use. Default is None.\n curved_lines: If True, the plot will have curved instead of straight lines. Default is True.\n\n Returns:\n A matplotlib figure containing the parallel coordinates plot.\n\n Raises:\n EmptyDataFrameException: Raised when the DataFrame is empty.\n InvalidColumnException: Raised when the color column is not found in the DataFrame.\n InconsistentDataTypesException: Raised when the color column has multiple data types.\n \"\"\"\n\n if df.empty:\n raise exceptions.EmptyDataFrameException(\"The input DataFrame is empty.\")\n\n if color_column_name not in df.columns:\n raise exceptions.InvalidColumnException(\n f\"The provided color column {color_column_name} is not found in the DataFrame.\"\n )\n\n df = df.convert_dtypes()\n df = df.apply(pd.to_numeric, errors=\"ignore\")\n\n color_data = df[color_column_name].to_numpy()\n if len(set([type(elem) for elem in color_data])) != 1:\n raise exceptions.InconsistentDataTypesException(\n \"The color column should have a consistent datatype. Multiple data types detected in the color column.\"\n )\n\n df = df.select_dtypes(include=np.number)\n\n # Drop non-numeric columns and the column used for coloring\n columns_to_drop = [color_column_name]\n for column in df.columns.values:\n if df[column].isnull().all():\n columns_to_drop.append(column)\n df = df.loc[:, ~df.columns.isin(columns_to_drop)]\n\n # Keep only first 8 columns if more are still present\n if len(df.columns.values) > 8:\n df = df.iloc[:, :8]\n\n data_labels = df.columns.values\n data = df.to_numpy()\n\n fig = _plot_parallel_coordinates(\n data=data,\n data_labels=data_labels,\n color_data=color_data,\n color_column_name=color_column_name,\n plot_title=plot_title,\n palette_name=palette_name,\n curved_lines=curved_lines,\n )\n return fig\n
"},{"location":"exploratory_analyses/pca/","title":"PCA","text":""},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.compute_pca","title":"compute_pca(data, number_of_components, scaler_type='standard', nodata=None, color_column_name=None)
","text":"Compute given number of principal components for numeric input data.
Various input data formats are accepted and the output format depends on the input format. If input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can be specified in this case.
Parameters:
Name Type Description Default data
Union[ndarray, DataFrame, GeoDataFrame, DatasetReader]
Input data for PCA.
required number_of_components
int
The number of principal components to compute Should be >= 1 and at most the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.
required scaler_type
Literal['standard', 'min_max', 'robust']
Transform data according to a specified Sklearn scaler. Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".
'standard'
nodata
Optional[Number]
Define nodata value to be masked out. Optional parameter. If None and input is raster, looks for nodata value from raster metadata. Defaults to None.
None
color_column_name
Optional[str]
If input data is a DataFrame or a GeoDataFrame, column name used for coloring data points in the produced pairplot can be defined. Defaults to None.
None
Returns:
Type Description Union[ndarray, Tuple[DataFrame, PairGrid], Tuple[GeoDataFrame, PairGrid], Tuple[ndarray, Profile]]
The computed principal components in corresponding format as the input data (for raster, output is
ndarray
Numpy array containing the data and raster profile) and the explained variance ratios for each component.
Raises:
Type Description EmptyDataException
The input is empty.
InvalidNumberOfPrincipalComponents
The number of principal components is less than 1 or more than number of columns if input was (Geo)DataFrame.
Source code in eis_toolkit/exploratory_analyses/pca.py
@beartype\ndef compute_pca(\n data: Union[np.ndarray, pd.DataFrame, gpd.GeoDataFrame, rasterio.io.DatasetReader],\n number_of_components: int,\n scaler_type: Literal[\"standard\", \"min_max\", \"robust\"] = \"standard\",\n nodata: Optional[Number] = None,\n color_column_name: Optional[str] = None,\n) -> Tuple[\n Union[\n np.ndarray,\n Tuple[pd.DataFrame, sns.PairGrid],\n Tuple[gpd.GeoDataFrame, sns.PairGrid],\n Tuple[np.ndarray, rasterio.profiles.Profile],\n ],\n np.ndarray,\n]:\n \"\"\"\n Compute given number of principal components for numeric input data.\n\n Various input data formats are accepted and the output format depends on the input format. If\n input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can\n be specified in this case.\n\n Args:\n data: Input data for PCA.\n number_of_components: The number of principal components to compute Should be >= 1 and at most\n the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.\n scaler_type: Transform data according to a specified Sklearn scaler.\n Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".\n nodata: Define nodata value to be masked out. Optional parameter. If None and input is raster, looks\n for nodata value from raster metadata. Defaults to None.\n color_column_name: If input data is a DataFrame or a GeoDataFrame, column name used for\n coloring data points in the produced pairplot can be defined. Defaults to None.\n\n Returns:\n The computed principal components in corresponding format as the input data (for raster, output is\n Numpy array containing the data and raster profile) and the explained variance ratios for each component.\n\n Raises:\n EmptyDataException: The input is empty.\n InvalidNumberOfPrincipalComponents: The number of principal components is less than 1 or more than\n number of columns if input was (Geo)DataFrame.\n \"\"\"\n if scaler_type not in SCALERS:\n raise exceptions.InvalidParameterValueException(f\"Invalid scaler. Choose from: {list(SCALERS.keys())}\")\n\n if number_of_components < 1:\n raise exceptions.InvalidParameterValueException(\"The number of principal components should be >= 1.\")\n\n # Get feature matrix (Numpy array) from various input types\n if isinstance(data, np.ndarray):\n feature_matrix = data\n if feature_matrix.ndim == 2: # Table-like data (assumme it is a DataFrame transformed to Numpy array)\n feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=False)\n elif feature_matrix.ndim == 3: # Assume data represents multiband raster data\n rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n else:\n raise exceptions.InvalidParameterValueException(\n f\"Unsupported input data format. {feature_matrix.ndim} dimensions detected.\"\n )\n if feature_matrix.size == 0:\n raise exceptions.EmptyDataException(\"Input array is empty.\")\n\n elif isinstance(data, rasterio.io.DatasetReader):\n feature_matrix = data.read()\n if feature_matrix.ndim < 3:\n raise exceptions.InvalidParameterValueException(\"Input raster should have multiple bands.\")\n rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n if nodata is None:\n nodata = data.nodata\n feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n\n elif isinstance(data, pd.DataFrame):\n df = data.copy()\n if df.empty:\n raise exceptions.EmptyDataException(\"Input DataFrame is empty.\")\n if number_of_components > len(df.columns):\n raise exceptions.InvalidParameterValueException(\n \"The number of principal should be at most the number of numeric columns in the input DataFrame.\"\n )\n if color_column_name is not None:\n color_column_data = df[color_column_name]\n\n if isinstance(data, gpd.GeoDataFrame):\n geometries = data.geometry\n crs = data.crs\n df = df.drop(columns=[\"geometry\"])\n\n df = df.convert_dtypes()\n df = df.apply(pd.to_numeric, errors=\"ignore\")\n df = df.select_dtypes(include=np.number)\n df = df.astype(dtype=np.number)\n feature_matrix = df.to_numpy()\n feature_matrix = feature_matrix.astype(float)\n feature_matrix, nan_mask = _handle_missing_values(feature_matrix, nodata)\n\n # Core PCA computation\n principal_components, explained_variances = _compute_pca(feature_matrix, number_of_components, scaler_type)\n\n # Put nodata back in and consider new dimension of data\n if nodata is not None:\n principal_components[nan_mask[:, number_of_components]] = nodata\n else:\n principal_components[nan_mask[:, :number_of_components]] = np.nan\n\n # Convert PCA output to proper format\n if isinstance(data, np.ndarray):\n if data.ndim == 3:\n result_data = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n else:\n result_data = principal_components\n\n elif isinstance(data, rasterio.io.DatasetReader):\n principal_components = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n out_profile = data.profile.copy()\n out_profile[\"count\"] = number_of_components\n out_profile[\"dtype\"] = \"float32\"\n result_data = (principal_components, out_profile)\n\n elif isinstance(data, pd.DataFrame):\n component_names = [f\"principal_component_{i+1}\" for i in range(number_of_components)]\n pca_df = pd.DataFrame(data=principal_components, columns=component_names)\n if color_column_name is not None:\n pca_df[color_column_name] = color_column_data\n sns_pair_grid = plot_pca(pca_df, explained_variances, color_column_name)\n if isinstance(data, gpd.GeoDataFrame):\n pca_df = gpd.GeoDataFrame(pca_df, geometry=geometries, crs=crs)\n result_data = (pca_df, sns_pair_grid)\n\n return result_data, explained_variances\n
"},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.plot_pca","title":"plot_pca(pca_df, explained_variances=None, color_column_name=None, save_path=None)
","text":"Plot a scatter matrix of different principal component combinations.
Parameters:
Name Type Description Default pca_df
DataFrame
A DataFrame containing computed principal components.
required explained_variances
Optional[ndarray]
The explained variance ratios for each principal component. Used for labeling axes in the plot. Optional parameter. Defaults to None.
None
color_column_name
Optional[str]
Name of the column that will be used for color-coding data points. Typically a categorical variable in the original data. Optional parameter, no colors if not provided. Defaults to None.
None
save_path
Optional[str]
The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.
None
Returns:
Type Description PairGrid
A Seaborn pairgrid containing the PCA scatter matrix.
Raises:
Type Description InvalidColumnException
DataFrame does not contain the given color column.
Source code in eis_toolkit/exploratory_analyses/pca.py
@beartype\ndef plot_pca(\n pca_df: pd.DataFrame,\n explained_variances: Optional[np.ndarray] = None,\n color_column_name: Optional[str] = None,\n save_path: Optional[str] = None,\n) -> sns.PairGrid:\n \"\"\"Plot a scatter matrix of different principal component combinations.\n\n Args:\n pca_df: A DataFrame containing computed principal components.\n explained_variances: The explained variance ratios for each principal component. Used for labeling\n axes in the plot. Optional parameter. Defaults to None.\n color_column_name: Name of the column that will be used for color-coding data points. Typically a\n categorical variable in the original data. Optional parameter, no colors if not provided.\n Defaults to None.\n save_path: The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.\n\n Returns:\n A Seaborn pairgrid containing the PCA scatter matrix.\n\n Raises:\n InvalidColumnException: DataFrame does not contain the given color column.\n \"\"\"\n\n if color_column_name and color_column_name not in pca_df.columns:\n raise exceptions.InvalidColumnException(\"DataFrame does not contain the given color column.\")\n\n pair_grid = sns.pairplot(pca_df, hue=color_column_name)\n\n # Add explained variances to axis labels if provided\n if explained_variances is not None:\n labels = [f\"PC {i+1} ({var:.1f}%)\" for i, var in enumerate(explained_variances * 100)]\n else:\n labels = [f\"PC {i+1}\" for i in range(len(pair_grid.axes))]\n\n # Iterate over axes objects and set the labels\n for i, ax_row in enumerate(pair_grid.axes):\n for j, ax in enumerate(ax_row):\n if j == 0: # Only the first column\n ax.set_ylabel(labels[i], fontsize=\"large\")\n if i == len(ax_row) - 1: # Only the last row\n ax.set_xlabel(labels[j], fontsize=\"large\")\n\n if save_path is not None:\n plt.savefig(save_path)\n\n return pair_grid\n
"},{"location":"exploratory_analyses/statistical_testing/","title":"Statistical (hypothesis) testing","text":""},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.chi_square_test","title":"chi_square_test(data, target_column, columns=None)
","text":"Compute Chi-square test for independence on the input data.
It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings, booleans or integers, but not floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data
required target_column
str
Variable against which independence of other variables is tested.
required columns
Sequence[str]
Variables that are tested against the variable in target_column. If None, every column is used.
None
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
InvalidParameterValueException
The target_column is not in input Dataframe or invalid column is provided.
Returns:
Type Description dict
Test statistics for each variable (except target_column).
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef chi_square_test(data: pd.DataFrame, target_column: str, columns: Sequence[str] = None) -> dict:\n \"\"\"Compute Chi-square test for independence on the input data.\n\n It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings,\n booleans or integers, but not floats.\n\n Args:\n data: Dataframe containing the input data\n target_column: Variable against which independence of other variables is tested.\n columns: Variables that are tested against the variable in target_column. If None, every column is used.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n InvalidParameterValueException: The target_column is not in input Dataframe or invalid column is provided.\n\n Returns:\n Test statistics for each variable (except target_column).\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n if not check_columns_valid(data, target_column):\n raise exceptions.InvalidParameterValueException(\"Target column not found in the Dataframe.\")\n\n if columns is not None:\n invalid_columns = [column for column in columns if column not in data.columns]\n if any(invalid_columns):\n raise exceptions.InvalidParameterValueException(\n f\"The following variables are not in the dataframe: {invalid_columns}\"\n )\n else:\n columns = data.columns\n\n statistics = {}\n for column in columns:\n if column != target_column:\n contingency_table = pd.crosstab(data[target_column], data[column])\n chi_square, p_value, degrees_of_freedom, _ = chi2_contingency(contingency_table)\n statistics[column] = (chi_square, p_value, degrees_of_freedom)\n\n return statistics\n
"},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.correlation_matrix","title":"correlation_matrix(data, correlation_method='pearson', min_periods=None)
","text":"Compute correlation matrix on the input data.
It is assumed that the data is numeric, i.e. integers or floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data.
required correlation_method
Literal[pearson, kendall, spearman]
'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.
'pearson'
min_periods
Optional[int]
Minimum number of observations required per pair of columns to have valid result. Optional.
None
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
InvalidParameterValueException
min_periods argument is used with method 'kendall'.
Returns:
Type Description DataFrame
Dataframe containing the correlation matrix
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef correlation_matrix(\n data: pd.DataFrame,\n correlation_method: Literal[\"pearson\", \"kendall\", \"spearman\"] = \"pearson\",\n min_periods: Optional[int] = None,\n) -> pd.DataFrame:\n \"\"\"Compute correlation matrix on the input data.\n\n It is assumed that the data is numeric, i.e. integers or floats.\n\n Args:\n data: Dataframe containing the input data.\n correlation_method: 'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.\n min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n InvalidParameterValueException: min_periods argument is used with method 'kendall'.\n\n Returns:\n Dataframe containing the correlation matrix\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n if correlation_method == \"kendall\" and min_periods is not None:\n raise exceptions.InvalidParameterValueException(\n \"The argument min_periods is available only with correlation methods 'pearson' and 'spearman'.\"\n )\n\n matrix = data.corr(method=correlation_method, min_periods=min_periods)\n\n return matrix\n
"},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.covariance_matrix","title":"covariance_matrix(data, min_periods=None, delta_degrees_of_freedom=1)
","text":"Compute covariance matrix on the input data.
It is assumed that the data is numeric, i.e. integers or floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data.
required min_periods
Optional[int]
Minimum number of observations required per pair of columns to have valid result. Optional.
None
delta_degrees_of_freedom
int
Delta degrees of freedom used for computing covariance matrix. Defaults to 1.
1
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
InvalidParameterValueException
Provided value for delta_degrees_of_freedom is negative.
Returns:
Type Description DataFrame
Dataframe containing the covariance matrix
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef covariance_matrix(\n data: pd.DataFrame, min_periods: Optional[int] = None, delta_degrees_of_freedom: int = 1\n) -> pd.DataFrame:\n \"\"\"Compute covariance matrix on the input data.\n\n It is assumed that the data is numeric, i.e. integers or floats.\n\n Args:\n data: Dataframe containing the input data.\n min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n delta_degrees_of_freedom: Delta degrees of freedom used for computing covariance matrix. Defaults to 1.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n InvalidParameterValueException: Provided value for delta_degrees_of_freedom is negative.\n\n Returns:\n Dataframe containing the covariance matrix\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n if delta_degrees_of_freedom < 0:\n raise exceptions.InvalidParameterValueException(\"Delta degrees of freedom must be non-negative.\")\n\n matrix = data.cov(min_periods=min_periods, ddof=delta_degrees_of_freedom)\n\n return matrix\n
"},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.normality_test","title":"normality_test(data)
","text":"Compute Shapiro-Wilk test for normality on the input data.
It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data.
required Returns:
Type Description dict
Test statistics for each variable.
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef normality_test(data: pd.DataFrame) -> dict:\n \"\"\"Compute Shapiro-Wilk test for normality on the input data.\n\n It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.\n\n Args:\n data: Dataframe containing the input data.\n\n Returns:\n Test statistics for each variable.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n statistics = {}\n for column in data.columns:\n statistic, p_value = shapiro(data[column])\n statistics[column] = (statistic, p_value)\n\n return statistics\n
"},{"location":"prediction/fuzzy_overlay/","title":"Fuzzy overlay","text":""},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.and_overlay","title":"and_overlay(data)
","text":"Compute an 'and' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef and_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute an 'and' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return data.min(axis=0)\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.gamma_overlay","title":"gamma_overlay(data, gamma)
","text":"Compute a 'gamma' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required gamma
float
The gamma parameter. With gamma value 0, result will be same as 'product'overlay. When gamma is closer to 1, the weight of 'sum' overlay is increased. Value must be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values or gamma are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef gamma_overlay(data: np.ndarray, gamma: float) -> np.ndarray:\n \"\"\"Compute a 'gamma' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n gamma: The gamma parameter. With gamma value 0, result will be same as 'product'overlay.\n When gamma is closer to 1, the weight of 'sum' overlay is increased.\n Value must be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values or gamma are not in range [0, 1].\n \"\"\"\n if gamma < 0 or gamma > 1:\n raise exceptions.InvalidParameterValueException(\"The gamma parameter must be in range [0, 1]\")\n\n sum = sum_overlay(data=data)\n product = product_overlay(data=data)\n return product ** (1 - gamma) * sum**gamma\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.or_overlay","title":"or_overlay(data)
","text":"Compute an 'or' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef or_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute an 'or' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return data.max(axis=0)\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.product_overlay","title":"product_overlay(data)
","text":"Compute a 'product' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef product_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute a 'product' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return np.prod(data, axis=0)\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.sum_overlay","title":"sum_overlay(data)
","text":"Compute a 'sum' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef sum_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute a 'sum' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return data.sum(axis=0) - np.prod(data, axis=0)\n
"},{"location":"prediction/weights_of_evidence/","title":"Weights of evidence","text":""},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_responses","title":"weights_of_evidence_calculate_responses(output_arrays, nr_of_deposits, nr_of_pixels)
","text":"Calculate the posterior probabilities for the given generalized weight arrays.
Parameters:
Name Type Description Default output_arrays
Sequence[Dict[str, ndarray]]
List of output array dictionaries returned by weights of evidence calculations. For each dictionary, generalized weight and generalized standard deviation arrays are used and summed together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found, the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).
required nr_of_deposits
int
Number of deposit pixels in the input data for weights of evidence calculations.
required nr_of_pixels
int
Number of evidence pixels in the input data for weights of evidence calculations.
required Returns:
Type Description ndarray
Array of posterior probabilites.
ndarray
Array of standard deviations in the posterior probability calculations.
ndarray
Array of confidence of the prospectivity values obtained in the posterior probability array.
Source code in eis_toolkit/prediction/weights_of_evidence.py
@beartype\ndef weights_of_evidence_calculate_responses(\n output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int\n) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:\n \"\"\"Calculate the posterior probabilities for the given generalized weight arrays.\n\n Args:\n output_arrays: List of output array dictionaries returned by weights of evidence calculations.\n For each dictionary, generalized weight and generalized standard deviation arrays are used and summed\n together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found,\n the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).\n nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.\n nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations.\n\n Returns:\n Array of posterior probabilites.\n Array of standard deviations in the posterior probability calculations.\n Array of confidence of the prospectivity values obtained in the posterior probability array.\n \"\"\"\n gen_weights_sum = sum(\n [\n item[GENERALIZED_WEIGHT_PLUS_COLUMN]\n if GENERALIZED_WEIGHT_PLUS_COLUMN in item.keys()\n else item[WEIGHT_PLUS_COLUMN]\n for item in output_arrays\n ]\n )\n gen_weights_variance_sum = sum(\n [\n np.square(item[GENERALIZED_S_WEIGHT_PLUS_COLUMN])\n if GENERALIZED_S_WEIGHT_PLUS_COLUMN in item.keys()\n else np.square(item[WEIGHT_S_PLUS_COLUMN])\n for item in output_arrays\n ]\n )\n\n prior_probabilities = nr_of_deposits / nr_of_pixels\n prior_odds = np.log(prior_probabilities / (1 - prior_probabilities))\n posterior_probabilities = np.exp(gen_weights_sum + prior_odds) / (1 + np.exp(gen_weights_sum + prior_odds))\n\n posterior_probabilities_squared = np.square(posterior_probabilities)\n posterior_probabilities_std = np.sqrt(\n (1 / nr_of_deposits + gen_weights_variance_sum) * posterior_probabilities_squared\n )\n\n confidence_array = posterior_probabilities / posterior_probabilities_std\n return posterior_probabilities, posterior_probabilities_std, confidence_array\n
"},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_weights","title":"weights_of_evidence_calculate_weights(evidential_raster, deposits, raster_nodata=None, weights_type='unique', studentized_contrast_threshold=1, arrays_to_generate=None)
","text":"Calculate weights of spatial associations.
Parameters:
Name Type Description Default evidential_raster
DatasetReader
The evidential raster.
required deposits
GeoDataFrame
Vector data representing the mineral deposits or occurences point data.
required raster_nodata
Optional[Number]
If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None (nodata from raster metadata is used).
None
weights_type
Literal[unique, categorical, ascending, descending]
Accepted values are 'unique', 'categorical', 'ascending' and 'descending'. Unique weights does not create generalized classes and does not use a studentized contrast threshold value while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so that all classes with studentized contrast below the defined threshold are grouped into one generalized class. Cumulative ascending and descending weights find the class with max contrast and group classes above/below into generalized classes. Generalized weights are also calculated for generalized classes.
'unique'
studentized_contrast_threshold
Number
Studentized contrast threshold value used with 'categorical', 'ascending' and 'descending' weight types. Used either as reclassification threshold directly (categorical) or to check that class with max contrast has studentized contrast value at least the defined value (cumulative). Defaults to 1.
1
arrays_to_generate
Optional[Sequence[str]]
Arrays to generate from the computed weight metrics. All column names in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+] for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"] for the cumulative weight types.
None
Returns:
Type Description DataFrame
Dataframe with weights of spatial association between the input data.
dict
Dictionary of arrays for specified metrics.
dict
Raster metadata.
int
Number of deposit pixels.
int
Number of all evidence pixels.
Source code in eis_toolkit/prediction/weights_of_evidence.py
@beartype\ndef weights_of_evidence_calculate_weights(\n evidential_raster: rasterio.io.DatasetReader,\n deposits: gpd.GeoDataFrame,\n raster_nodata: Optional[Number] = None,\n weights_type: Literal[\"unique\", \"categorical\", \"ascending\", \"descending\"] = \"unique\",\n studentized_contrast_threshold: Number = 1,\n arrays_to_generate: Optional[Sequence[str]] = None,\n) -> Tuple[pd.DataFrame, dict, dict, int, int]:\n \"\"\"\n Calculate weights of spatial associations.\n\n Args:\n evidential_raster: The evidential raster.\n deposits: Vector data representing the mineral deposits or occurences point data.\n raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None\n (nodata from raster metadata is used).\n weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'.\n Unique weights does not create generalized classes and does not use a studentized contrast threshold value\n while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so\n that all classes with studentized contrast below the defined threshold are grouped into one generalized\n class. Cumulative ascending and descending weights find the class with max contrast and group classes\n above/below into generalized classes. Generalized weights are also calculated for generalized classes.\n studentized_contrast_threshold: Studentized contrast threshold value used with 'categorical', 'ascending' and\n 'descending' weight types. Used either as reclassification threshold directly (categorical) or to check\n that class with max contrast has studentized contrast value at least the defined value (cumulative).\n Defaults to 1.\n arrays_to_generate: Arrays to generate from the computed weight metrics. All column names\n in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+]\n for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"]\n for the cumulative weight types.\n\n Returns:\n Dataframe with weights of spatial association between the input data.\n Dictionary of arrays for specified metrics.\n Raster metadata.\n Number of deposit pixels.\n Number of all evidence pixels.\n \"\"\"\n\n if arrays_to_generate is None:\n if weights_type == \"unique\":\n metrics_to_arrays = DEFAULT_METRICS_UNIQUE\n else:\n metrics_to_arrays = DEFAULT_METRICS_CUMULATIVE\n else:\n for col_name in arrays_to_generate:\n if col_name not in VALID_DF_COLUMNS:\n raise exceptions.InvalidColumnException(\n f\"Arrays to generate contains invalid metric / column name: {col_name}.\"\n )\n metrics_to_arrays = arrays_to_generate.copy()\n\n # 1. Preprocess data\n evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata)\n raster_meta = evidential_raster.meta\n\n # Rasterize deposits\n deposit_array, _ = rasterize_vector(\n geodataframe=deposits, default_value=1.0, base_raster_profile=raster_meta, fill_value=0.0\n )\n\n # Mask NaN out of the array\n nodata_mask = np.isnan(evidence_array)\n masked_evidence_array = evidence_array[~nodata_mask]\n masked_deposit_array = deposit_array[~nodata_mask]\n\n # 2. WofE calculations\n if weights_type == \"unique\" or weights_type == \"categorical\":\n wofe_weights = _unique_weights(masked_deposit_array, masked_evidence_array)\n elif weights_type == \"ascending\":\n wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=True)\n elif weights_type == \"descending\":\n wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=False)\n else:\n raise exceptions.InvalidParameterValueException(\n \"Expected weights_type to be one of unique, categorical, ascending or descending.\"\n )\n\n # 3. Create DataFrame based on calculated metrics\n df_entries = []\n for cls, metrics in wofe_weights.items():\n metrics = [round(metric, 4) if isinstance(metric, np.floating) else metric for metric in metrics]\n A, _, C, _, w_plus, s_w_plus, w_minus, s_w_minus, contrast, s_contrast, studentized_contrast = metrics\n df_entries.append(\n {\n CLASS_COLUMN: cls,\n PIXEL_COUNT_COLUMN: A + C,\n DEPOSIT_COUNT_COLUMN: A,\n WEIGHT_PLUS_COLUMN: w_plus,\n WEIGHT_S_PLUS_COLUMN: s_w_plus,\n WEIGHT_MINUS_COLUMN: w_minus,\n WEIGHT_S_MINUS_COLUMN: s_w_minus,\n CONTRAST_COLUMN: contrast,\n S_CONTRAST_COLUMN: s_contrast,\n STUDENTIZED_CONTRAST_COLUMN: studentized_contrast,\n }\n )\n weights_df = pd.DataFrame(df_entries)\n\n # 4. If we use cumulative weights type, calculate generalized classes and weights\n if weights_type == \"categorical\":\n weights_df = _generalized_classes_categorical(weights_df, studentized_contrast_threshold)\n weights_df = _generalized_weights_categorical(weights_df, masked_deposit_array)\n elif weights_type == \"ascending\" or weights_type == \"descending\":\n weights_df = _generalized_classes_cumulative(weights_df, studentized_contrast_threshold)\n weights_df = _generalized_weights_cumulative(weights_df, masked_deposit_array)\n\n # 5. Generate arrays for desired metrics\n arrays_dict = _generate_arrays_from_metrics(evidence_array, weights_df, metrics_to_arrays)\n\n # Return nr. of deposit pixels and nr. of all evidence pixels for to be used in calculate responses\n nr_of_deposits = int(np.sum(masked_deposit_array == 1))\n nr_of_pixels = int(np.size(masked_evidence_array))\n\n return weights_df, arrays_dict, raster_meta, nr_of_deposits, nr_of_pixels\n
"},{"location":"raster_processing/check_raster_grids/","title":"Check raster grids","text":""},{"location":"raster_processing/check_raster_grids/#eis_toolkit.raster_processing.check_raster_grids.check_raster_grids","title":"check_raster_grids(rasters, same_extent=False)
","text":"Check the set of input rasters for matching gridding and optionally matching bounds.
Parameters:
Name Type Description Default rasters
List[DatasetReader]
List of rasters to test for matching gridding.
required same_extent
bool
optional boolean argument that determines if rasters are tested for matching bounds. Default set to False.
False
Returns:
Type Description bool
True if gridding and optionally bounds matches, False if not.
Source code in eis_toolkit/raster_processing/check_raster_grids.py
def check_raster_grids( # type: ignore[no-any-unimported]\n rasters: List[rasterio.io.DatasetReader], same_extent: bool = False\n) -> bool:\n \"\"\"\n Check the set of input rasters for matching gridding and optionally matching bounds.\n\n Args:\n rasters: List of rasters to test for matching gridding.\n same_extent: optional boolean argument that determines if rasters are tested for matching bounds.\n Default set to False.\n\n Returns:\n True if gridding and optionally bounds matches, False if not.\n \"\"\"\n check = _check_raster_grids(rasters=rasters, same_extent=same_extent)\n return check\n
"},{"location":"raster_processing/clipping/","title":"Clipping","text":""},{"location":"raster_processing/clipping/#eis_toolkit.raster_processing.clipping.clip_raster","title":"clip_raster(raster, geodataframe)
","text":"Clips a raster with polygon geometries.
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be clipped.
required geodataframe
GeoDataFrame
A geodataframe containing the geometries to do the clipping with. Should contain only polygon features.
required Returns:
Type Description ndarray
The clipped raster data.
dict
The updated metadata.
Raises:
Type Description NonMatchingCrsException
The raster and geodataframe are not in the same CRS.
NotApplicableGeometryTypeException
The input geometries contain non-polygon features.
Source code in eis_toolkit/raster_processing/clipping.py
@beartype\ndef clip_raster(raster: rasterio.io.DatasetReader, geodataframe: geopandas.GeoDataFrame) -> Tuple[np.ndarray, dict]:\n \"\"\"Clips a raster with polygon geometries.\n\n Args:\n raster: The raster to be clipped.\n geodataframe: A geodataframe containing the geometries to do the clipping with.\n Should contain only polygon features.\n\n Returns:\n The clipped raster data.\n The updated metadata.\n\n Raises:\n NonMatchingCrsException: The raster and geodataframe are not in the same CRS.\n NotApplicableGeometryTypeException: The input geometries contain non-polygon features.\n \"\"\"\n geometries = geodataframe[\"geometry\"]\n\n if not check_matching_crs(\n objects=[raster, geometries],\n ):\n raise NonMatchingCrsException(\"The raster and geodataframe are not in the same CRS.\")\n\n if not check_geometry_types(\n geometries=geometries,\n allowed_types=[\"Polygon\", \"MultiPolygon\"],\n ):\n raise NotApplicableGeometryTypeException(\"The input geometries contain non-polygon features.\")\n\n out_image, out_meta = _clip_raster(\n raster=raster,\n geometries=geometries,\n )\n\n return out_image, out_meta\n
"},{"location":"raster_processing/create_constant_raster/","title":"Create constant raster","text":""},{"location":"raster_processing/create_constant_raster/#eis_toolkit.raster_processing.create_constant_raster.create_constant_raster","title":"create_constant_raster(constant_value, template_raster=None, coord_west=None, coord_north=None, coord_east=None, coord_south=None, target_epsg=None, target_pixel_size=None, raster_width=None, raster_height=None, nodata_value=None)
","text":"Create a constant raster based on a user-defined value.
Provide 3 methods for raster creation: 1. Set extent and coordinate system based on a template raster. 2. Set extent from origin, based on the western and northern coordinates and the pixel size. 3. Set extent from bounds, based on western, northern, eastern and southern points.
Always provide values for height and width for the last two options, which correspond to the desired number of pixels for rows and columns.
Parameters:
Name Type Description Default constant_value
Number
The constant value to use in the raster.
required template_raster
Optional[DatasetReader]
An optional raster to use as a template for the output.
None
coord_west
Optional[Number]
The western coordinate of the output raster in [m].
None
coord_east
Optional[Number]
The eastern coordinate of the output raster in [m].
None
coord_south
Optional[Number]
The southern coordinate of the output raster in [m].
None
coord_north
Optional[Number]
The northern coordinate of the output raster in [m].
None
target_epsg
Optional[int]
The EPSG code for the output raster.
None
target_pixel_size
Optional[int]
The pixel size of the output raster.
None
raster_width
Optional[int]
The width of the output raster.
None
raster_height
Optional[int]
The height of the output raster.
None
nodata_value
Optional[Number]
The nodata value of the output raster.
None
Returns:
Type Description Tuple[ndarray, dict]
A tuple containing the output raster as a NumPy array and updated metadata.
Raises:
Type Description InvalidParameterValueException
Provide invalid input parameter.
Source code in eis_toolkit/raster_processing/create_constant_raster.py
@beartype\ndef create_constant_raster( # type: ignore[no-any-unimported]\n constant_value: Number,\n template_raster: Optional[rasterio.io.DatasetReader] = None,\n coord_west: Optional[Number] = None,\n coord_north: Optional[Number] = None,\n coord_east: Optional[Number] = None,\n coord_south: Optional[Number] = None,\n target_epsg: Optional[int] = None,\n target_pixel_size: Optional[int] = None,\n raster_width: Optional[int] = None,\n raster_height: Optional[int] = None,\n nodata_value: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Create a constant raster based on a user-defined value.\n\n Provide 3 methods for raster creation:\n 1. Set extent and coordinate system based on a template raster.\n 2. Set extent from origin, based on the western and northern coordinates and the pixel size.\n 3. Set extent from bounds, based on western, northern, eastern and southern points.\n\n Always provide values for height and width for the last two options, which correspond to\n the desired number of pixels for rows and columns.\n\n Args:\n constant_value: The constant value to use in the raster.\n template_raster: An optional raster to use as a template for the output.\n coord_west: The western coordinate of the output raster in [m].\n coord_east: The eastern coordinate of the output raster in [m].\n coord_south: The southern coordinate of the output raster in [m].\n coord_north: The northern coordinate of the output raster in [m].\n target_epsg: The EPSG code for the output raster.\n target_pixel_size: The pixel size of the output raster.\n raster_width: The width of the output raster.\n raster_height: The height of the output raster.\n nodata_value: The nodata value of the output raster.\n\n Returns:\n A tuple containing the output raster as a NumPy array and updated metadata.\n\n Raises:\n InvalidParameterValueException: Provide invalid input parameter.\n \"\"\"\n\n if template_raster is not None:\n out_array, out_meta = _create_constant_raster_from_template(constant_value, template_raster, nodata_value)\n\n elif all(coords is not None for coords in [coord_west, coord_east, coord_south, coord_north]):\n if raster_height <= 0 or raster_width <= 0:\n raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n if not check_minmax_position((coord_west, coord_east) or not check_minmax_position((coord_south, coord_north))):\n raise InvalidParameterValueException(\"Invalid coordinate values provided.\")\n\n out_array, out_meta = _create_constant_raster_from_bounds(\n constant_value,\n coord_west,\n coord_north,\n coord_east,\n coord_south,\n target_epsg,\n raster_width,\n raster_height,\n nodata_value,\n )\n\n elif all(coords is not None for coords in [coord_west, coord_north]) and all(\n coords is None for coords in [coord_east, coord_south]\n ):\n if raster_height <= 0 or raster_width <= 0:\n raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n if target_pixel_size <= 0:\n raise InvalidParameterValueException(\"Invalid pixel size.\")\n\n out_array, out_meta = _create_constant_raster_from_origin(\n constant_value,\n coord_west,\n coord_north,\n target_epsg,\n target_pixel_size,\n raster_width,\n raster_height,\n nodata_value,\n )\n\n else:\n raise InvalidParameterValueException(\"Suitable parameter values were not provided for any of the 3 methods.\")\n\n constant_value = cast_scalar_to_int(constant_value)\n nodata_value = cast_scalar_to_int(out_meta[\"nodata\"])\n\n if isinstance(constant_value, int) and isinstance(nodata_value, int):\n target_dtype = np.result_type(get_min_int_type(constant_value), get_min_int_type(nodata_value))\n out_array = out_array.astype(target_dtype)\n out_meta[\"dtype\"] = out_array.dtype\n elif isinstance(constant_value, int) and isinstance(nodata_value, float):\n out_array = out_array.astype(get_min_int_type(constant_value))\n out_meta[\"dtype\"] = np.float64.__name__\n elif isinstance(constant_value, float):\n out_array = out_array.astype(np.float64)\n out_meta[\"dtype\"] = out_array.dtype\n\n return out_array, out_meta\n
"},{"location":"raster_processing/extract_values_from_raster/","title":"Extract values from raster","text":""},{"location":"raster_processing/extract_values_from_raster/#eis_toolkit.raster_processing.extract_values_from_raster.extract_values_from_raster","title":"extract_values_from_raster(raster_list, geodataframe, raster_column_names=None)
","text":"Extract raster values using point data to a DataFrame.
If custom column names are not given, column names are file_name for singleband files and file_name_bandnumber for multiband files. If custom column names are given, there should be column names for each raster provided in the raster list.
Parameters:
Name Type Description Default raster_list
Sequence[DatasetReader]
List to extract values from.
required geodataframe
GeoDataFrame
Object to extract values with.
required raster_column_names
Optional[Sequence[str]]
List of optional column names for bands.
None
Returns:
Type Description DataFrame
Dataframe with x & y coordinates and the values from the raster file(s) as columns.
Raises:
Type Description NonMatchingParameterLengthsException
raster_list and raster_columns_names have different lengths.
Source code in eis_toolkit/raster_processing/extract_values_from_raster.py
@beartype\ndef extract_values_from_raster(\n raster_list: Sequence[rasterio.io.DatasetReader],\n geodataframe: gpd.GeoDataFrame,\n raster_column_names: Optional[Sequence[str]] = None,\n) -> pd.DataFrame:\n \"\"\"Extract raster values using point data to a DataFrame.\n\n If custom column names are not given, column names are file_name for singleband files\n and file_name_bandnumber for multiband files. If custom column names are given, there\n should be column names for each raster provided in the raster list.\n\n Args:\n raster_list: List to extract values from.\n geodataframe: Object to extract values with.\n raster_column_names: List of optional column names for bands.\n\n Returns:\n Dataframe with x & y coordinates and the values from the raster file(s) as columns.\n\n Raises:\n NonMatchingParameterLengthsException: raster_list and raster_columns_names have different lengths.\n \"\"\"\n if raster_column_names == []:\n raster_column_names = None\n\n if raster_column_names is not None and len(raster_list) != len(raster_column_names):\n raise NonMatchingParameterLengthsException(\"Raster list and raster columns names have different lengths.\")\n\n data_frame = _extract_values_from_raster(\n raster_list=raster_list, geodataframe=geodataframe, raster_column_names=raster_column_names\n )\n\n return data_frame\n
"},{"location":"raster_processing/reprojecting/","title":"Reprojecting","text":""},{"location":"raster_processing/reprojecting/#eis_toolkit.raster_processing.reprojecting.reproject_raster","title":"reproject_raster(raster, target_crs, resampling_method=warp.Resampling.nearest)
","text":"Reprojects raster to match given coordinate reference system (EPSG).
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be reprojected.
required target_crs
int
Target CRS as EPSG code.
required resampling_method
Resampling
Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.
nearest
Returns:
Type Description ndarray
The reprojected raster data.
dict
The updated metadata.
Raises:
Type Description NonMatchinCrsException
Raster is already in the target CRS.
Source code in eis_toolkit/raster_processing/reprojecting.py
@beartype\ndef reproject_raster(\n raster: rasterio.io.DatasetReader, target_crs: int, resampling_method: warp.Resampling = warp.Resampling.nearest\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Reprojects raster to match given coordinate reference system (EPSG).\n\n Args:\n raster: The raster to be reprojected.\n target_crs: Target CRS as EPSG code.\n resampling_method: Resampling method. Most suitable method depends on the dataset and context.\n Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.\n\n Returns:\n The reprojected raster data.\n The updated metadata.\n\n Raises:\n NonMatchinCrsException: Raster is already in the target CRS.\n \"\"\"\n if target_crs == int(raster.crs.to_string()[5:]):\n raise MatchingCrsException(\"Raster is already in the target CRS.\")\n\n out_image, out_meta = _reproject_raster(raster, target_crs, resampling_method)\n\n return out_image, out_meta\n
"},{"location":"raster_processing/resampling/","title":"Resampling","text":""},{"location":"raster_processing/resampling/#eis_toolkit.raster_processing.resampling.resample","title":"resample(raster, resolution, resampling_method=Resampling.bilinear)
","text":"Resamples raster according to given resolution.
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be resampled.
required resolution
Number
Target resolution i.e. cell size of the output raster.
required resampling_method
Resampling
Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to bilinear.
bilinear
Returns:
Type Description ndarray
The resampled raster data.
dict
The updated metadata.
Raises:
Type Description NumericValueSignException
Resolution is not a positive value.
Source code in eis_toolkit/raster_processing/resampling.py
@beartype\ndef resample(\n raster: rasterio.io.DatasetReader,\n resolution: Number,\n resampling_method: Resampling = Resampling.bilinear,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Resamples raster according to given resolution.\n\n Args:\n raster: The raster to be resampled.\n resolution: Target resolution i.e. cell size of the output raster.\n resampling_method: Resampling method. Most suitable\n method depends on the dataset and context. Nearest, bilinear and cubic are some\n common choices. This parameter defaults to bilinear.\n\n Returns:\n The resampled raster data.\n The updated metadata.\n\n Raises:\n NumericValueSignException: Resolution is not a positive value.\n \"\"\"\n if resolution <= 0:\n raise exceptions.NumericValueSignException(f\"Expected a positive value for resolution: {resolution})\")\n\n out_image, out_meta = _resample(raster, resolution, resampling_method)\n return out_image, out_meta\n
"},{"location":"raster_processing/snapping/","title":"Snapping","text":""},{"location":"raster_processing/snapping/#eis_toolkit.raster_processing.snapping.snap_with_raster","title":"snap_with_raster(raster, snap_raster)
","text":"Snaps/aligns raster to given snap raster.
Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction. If rasters are aligned, simply returns input raster data and metadata.
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be clipped.
required snap_raster
DatasetReader
The snap raster i.e. reference grid raster.
required Returns:
Type Description ndarray
The snapped raster data.
dict
The updated metadata.
Raises:
Type Description NonMatchingCrsException
Raster and and snap raster are not in the same CRS.
MatchingRasterGridException
Raster grids are already aligned.
Source code in eis_toolkit/raster_processing/snapping.py
@beartype\ndef snap_with_raster(raster: rasterio.DatasetReader, snap_raster: rasterio.DatasetReader) -> Tuple[np.ndarray, dict]:\n \"\"\"Snaps/aligns raster to given snap raster.\n\n Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction.\n If rasters are aligned, simply returns input raster data and metadata.\n\n Args:\n raster: The raster to be clipped.\n snap_raster: The snap raster i.e. reference grid raster.\n\n Returns:\n The snapped raster data.\n The updated metadata.\n\n Raises:\n NonMatchingCrsException: Raster and and snap raster are not in the same CRS.\n MatchingRasterGridException: Raster grids are already aligned.\n \"\"\"\n\n if not check_matching_crs(\n objects=[raster, snap_raster],\n ):\n raise NonMatchingCrsException(\"Raster and and snap raster have different CRS.\")\n\n if snap_raster.bounds.bottom == raster.bounds.bottom and snap_raster.bounds.left == raster.bounds.left:\n raise MatchingRasterGridException(\"Raster grids are already aligned.\")\n\n out_image, out_meta = _snap(raster, snap_raster)\n return out_image, out_meta\n
"},{"location":"raster_processing/unifying/","title":"Unifying","text":""},{"location":"raster_processing/unifying/#eis_toolkit.raster_processing.unifying.unify_raster_grids","title":"unify_raster_grids(base_raster, rasters_to_unify, resampling_method=Resampling.nearest, same_extent=False)
","text":"Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.
Parameters:
Name Type Description Default base_raster
DatasetReader
The base raster to determine target raster grid properties.
required rasters_to_unify
Sequence[DatasetReader]
Rasters to be unified with the base raster.
required resampling_method
Resampling
Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.
nearest
same_extent
bool
If the unified rasters will be forced to have the same extent/bounds as the base raster. Expands smaller rasters with nodata cells. Defaults to False.
False
Returns:
Type Description List[Tuple[ndarray, dict]]
List of unified rasters' data and metadata. First element is the base raster.
Raises:
Type Description InvalidParameterValueException
Rasters to unify is empty.
Source code in eis_toolkit/raster_processing/unifying.py
@beartype\ndef unify_raster_grids(\n base_raster: rasterio.io.DatasetReader,\n rasters_to_unify: Sequence[rasterio.io.DatasetReader],\n resampling_method: Resampling = Resampling.nearest,\n same_extent: bool = False,\n) -> List[Tuple[np.ndarray, dict]]:\n \"\"\"Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.\n\n Args:\n base_raster: The base raster to determine target raster grid properties.\n rasters_to_unify: Rasters to be unified with the base raster.\n resampling_method: Resampling method. Most suitable\n method depends on the dataset and context. Nearest, bilinear and cubic are some\n common choices. This parameter defaults to nearest.\n same_extent: If the unified rasters will be forced to have the same extent/bounds\n as the base raster. Expands smaller rasters with nodata cells. Defaults to False.\n\n Returns:\n List of unified rasters' data and metadata. First element is the base raster.\n\n Raises:\n InvalidParameterValueException: Rasters to unify is empty.\n \"\"\"\n if len(rasters_to_unify) == 0:\n raise InvalidParameterValueException(\"Rasters to unify is empty.\")\n\n out_rasters = _unify_raster_grids(base_raster, rasters_to_unify, resampling_method, same_extent)\n return out_rasters\n
"},{"location":"raster_processing/windowing/","title":"Windowing","text":""},{"location":"raster_processing/windowing/#eis_toolkit.raster_processing.windowing.extract_window","title":"extract_window(raster, center_coords, height, width)
","text":"Extract window from raster.
Center coordinate must be inside the raster but window can extent outside the raster in which case padding with raster nodata value is used. Args: raster: Source raster. center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS. height: Window height in pixels. width: Window width in pixels.
Returns:
Type Description ndarray
The extracted raster window.
dict
The updated metadata.
Raises:
Type Description InvalidParameterValueException
Window size is too small.
CoordinatesOutOfBoundException
Window center coordinates are out of raster bounds.
Source code in eis_toolkit/raster_processing/windowing.py
@beartype\ndef extract_window(\n raster: rasterio.io.DatasetReader,\n center_coords: Tuple[Number, Number],\n height: int,\n width: int,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Extract window from raster.\n\n Center coordinate must be inside the raster but window can extent outside the raster in which case padding with\n raster nodata value is used.\n Args:\n raster: Source raster.\n center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS.\n height: Window height in pixels.\n width: Window width in pixels.\n\n Returns:\n The extracted raster window.\n The updated metadata.\n\n Raises:\n InvalidParameterValueException: Window size is too small.\n CoordinatesOutOfBoundException: Window center coordinates are out of raster bounds.\n \"\"\"\n\n if height < 1 or width < 1:\n raise InvalidParameterValueException(f\"Window size is too small: {height}, {width}.\")\n\n center_x = center_coords[0]\n center_y = center_coords[1]\n\n if (\n center_x < raster.bounds.left\n or center_x > raster.bounds.right\n or center_y < raster.bounds.bottom\n or center_y > raster.bounds.top\n ):\n raise CoordinatesOutOfBoundsException(\"Window center coordinates are out of raster bounds.\")\n\n out_image, out_meta = _extract_window(raster, center_coords, height, width)\n\n return out_image, out_meta\n
"},{"location":"training_data_tools/class_balancing/","title":"Class balancing","text":""},{"location":"training_data_tools/class_balancing/#eis_toolkit.training_data_tools.class_balancing.balance_SMOTETomek","title":"balance_SMOTETomek(X, y, sampling_strategy='auto', random_state=None)
","text":"Balances the classes of input dataset using SMOTETomek resampling method.
Parameters:
Name Type Description Default X
Union[DataFrame, ndarray]
The feature matrix (input data as a DataFrame).
required y
Union[Series, ndarray]
The target labels corresponding to the feature matrix.
required sampling_strategy
Union[float, str, dict]
Parameter controlling how to perform the resampling. If float, specifies the ratio of samples in minority class to samples of majority class, if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"), if dict, the keys should be targeted classes and values the desired number of samples for the class. Defaults to \"auto\", which will resample all classes except the majority class.
'auto'
random_state
Optional[int]
Parameter controlling randomization of the algorithm. Can be given a seed (number). Defaults to None, which randomizes the seed.
None
Returns:
Type Description tuple[Union[DataFrame, ndarray], Union[Series, ndarray]]
Resampled feature matrix and target labels.
Raises:
Type Description NonMatchingParameterLengthsException
If X and y have different length.
Source code in eis_toolkit/training_data_tools/class_balancing.py
@beartype\ndef balance_SMOTETomek(\n X: Union[pd.DataFrame, np.ndarray],\n y: Union[pd.Series, np.ndarray],\n sampling_strategy: Union[float, str, dict] = \"auto\",\n random_state: Optional[int] = None,\n) -> tuple[Union[pd.DataFrame, np.ndarray], Union[pd.Series, np.ndarray]]:\n \"\"\"Balances the classes of input dataset using SMOTETomek resampling method.\n\n Args:\n X: The feature matrix (input data as a DataFrame).\n y: The target labels corresponding to the feature matrix.\n sampling_strategy: Parameter controlling how to perform the resampling.\n If float, specifies the ratio of samples in minority class to samples of majority class,\n if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"),\n if dict, the keys should be targeted classes and values the desired number of samples for the class.\n Defaults to \"auto\", which will resample all classes except the majority class.\n random_state: Parameter controlling randomization of the algorithm. Can be given a seed (number).\n Defaults to None, which randomizes the seed.\n\n Returns:\n Resampled feature matrix and target labels.\n\n Raises:\n NonMatchingParameterLengthsException: If X and y have different length.\n \"\"\"\n\n if len(X) != len(y):\n raise exceptions.NonMatchingParameterLengthsException(\n \"Feature matrix X and target labels y must have the same length.\"\n )\n\n X_res, y_res = SMOTETomek(sampling_strategy=sampling_strategy, random_state=random_state).fit_resample(X, y)\n return X_res, y_res\n
"},{"location":"transformations/binarize/","title":"Binarize","text":""},{"location":"transformations/binarize/#eis_toolkit.transformations.binarize.binarize","title":"binarize(raster, bands=None, thresholds=[Number], nodata=None)
","text":"Binarize data based on a given threshold.
Replaces values less or equal threshold with 0. Replaces values greater than the threshold with 1.
Takes one nodata value which will be re-written after transformation.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The threshold can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
thresholds
Sequence[Number]
Threshold values for transformation.
[Number]
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
Source code in eis_toolkit/transformations/binarize.py
@beartype\ndef binarize( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n thresholds: Sequence[Number] = [Number],\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Binarize data based on a given threshold.\n\n Replaces values less or equal threshold with 0.\n Replaces values greater than the threshold with 1.\n\n Takes one nodata value which will be re-written after transformation.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The threshold can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n thresholds: Threshold values for transformation.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = cast_scalar_to_int(raster.nodata if nodata is None else nodata)\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection.\")\n\n if check_parameter_length(bands, thresholds) is False:\n raise NonMatchingParameterLengthsException(\"Invalid threshold length.\")\n\n expanded_args = expand_and_zip(bands, thresholds)\n thresholds = [element[1] for element in expanded_args]\n\n out_settings = {}\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n inital_dtype = band_array.dtype\n\n band_mask = np.isin(band_array, nodata)\n band_array = _binarize(band_array, threshold=thresholds[i])\n band_array = np.where(band_mask, nodata, band_array)\n\n if not check_dtype_for_int(nodata):\n band_array = band_array.astype(inital_dtype)\n else:\n band_array = band_array.astype(np.min_scalar_type(nodata))\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"threshold\": thresholds[i],\n \"nodata\": nodata,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/clip/","title":"Clip","text":""},{"location":"transformations/clip/#eis_toolkit.transformations.clip.clip_transform","title":"clip_transform(raster, limits, bands=None, nodata=None)
","text":"Clips data based on specified upper and lower limits.
Takes one nodata value that will be ignored in calculations. Replaces values below the lower limit and above the upper limit with provided values, respecively. Works both one-sided and two-sided but raises error if no limits provided.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The limits can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
limits
Sequence[Tuple[Optional[Number], Optional[Number]]]
Lower and upper limits (lower, upper) as real values.
required nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values).
Source code in eis_toolkit/transformations/clip.py
@beartype\ndef clip_transform( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n limits: Sequence[Tuple[Optional[Number], Optional[Number]]],\n bands: Optional[Sequence[int]] = None,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Clips data based on specified upper and lower limits.\n\n Takes one nodata value that will be ignored in calculations.\n Replaces values below the lower limit and above the upper limit with provided values, respecively.\n Works both one-sided and two-sided but raises error if no limits provided.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The limits can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n limits: Lower and upper limits (lower, upper) as real values.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values).\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, limits) is False:\n raise NonMatchingParameterLengthsException(\"Invalid limit length.\")\n\n for item in limits:\n if item.count(None) == len(item):\n raise InvalidParameterValueException(f\"Limit values all None: {item}.\")\n\n if not check_minmax_position(item):\n raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n expanded_args = expand_and_zip(bands, limits)\n limits = [element[1] for element in expanded_args]\n\n out_settings = {}\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n inital_dtype = band_array.dtype\n\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n band_array = _clip_transform(band_array, limits=limits[i])\n\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"limit_lower\": cast_scalar_to_int(limits[i][0]),\n \"limit_upper\": cast_scalar_to_int(limits[i][1]),\n \"nodata\": cast_scalar_to_int(nodata),\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/linear/","title":"Linear","text":""},{"location":"transformations/linear/#eis_toolkit.transformations.linear.min_max_scaling","title":"min_max_scaling(raster, bands=None, new_range=[(0, 1)], nodata=None)
","text":"Normalize data based on a specified new range.
Uses the provided new minimum and maximum to transform data into the new interval. Takes one nodata value that will be ignored in calculations.
If no band/column selection specified, all bands/columns will be used. The new_range can be set for each band individually. If a parameter contains only 1 entry, it will be applied for all bands.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
new_range
Sequence[Tuple[Number, Number]]
The new interval data will be transformed into. First value corresponds to min, second to max.
[(0, 1)]
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values).
Source code in eis_toolkit/transformations/linear.py
@beartype\ndef min_max_scaling( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n new_range: Sequence[Tuple[Number, Number]] = [(0, 1)],\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Normalize data based on a specified new range.\n\n Uses the provided new minimum and maximum to transform data into the new interval.\n Takes one nodata value that will be ignored in calculations.\n\n If no band/column selection specified, all bands/columns will be used.\n The new_range can be set for each band individually.\n If a parameter contains only 1 entry, it will be applied for all bands.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n new_range: The new interval data will be transformed into. First value corresponds to min, second to max.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values).\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, new_range) is False:\n raise NonMatchingParameterLengthsException(\"Invalid new_range length\")\n\n for item in new_range:\n if not check_minmax_position(item):\n raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}\")\n\n expanded_args = expand_and_zip(bands, new_range)\n new_range = [element[1] for element in expanded_args]\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n band_array = _min_max_scaling(band_array.astype(np.float64), new_range=new_range[i])\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"scaled_min\": new_range[i][0],\n \"scaled_max\": new_range[i][1],\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/linear/#eis_toolkit.transformations.linear.z_score_normalization","title":"z_score_normalization(raster, bands=None, nodata=None)
","text":"Normalize data based on mean and standard deviation.
Results will have a mean = 0 and standard deviation = 1. Takes one nodata value that will be ignored in calculations.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
Source code in eis_toolkit/transformations/linear.py
@beartype\ndef z_score_normalization( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Normalize data based on mean and standard deviation.\n\n Results will have a mean = 0 and standard deviation = 1.\n Takes one nodata value that will be ignored in calculations.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection.\")\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n band_array, mean_array, sd_array = _z_score_normalization(band_array.astype(np.float64))\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"original_mean\": truncate_decimal_places(mean_array, decimal_places=out_decimals),\n \"original_sd\": truncate_decimal_places(sd_array, decimal_places=out_decimals),\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/logarithmic/","title":"Logarithmic","text":""},{"location":"transformations/logarithmic/#eis_toolkit.transformations.logarithmic.log_transform","title":"log_transform(raster, bands=None, log_transform=['log2'], nodata=None)
","text":"Perform a logarithmic transformation on the provided data.
Takes one nodata value that will be ignored in calculations. Negative values will not be considered for transformation and replaced by the specific nodata value.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The log_transform can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
log_transform
Sequence[str]
The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.
['log2']
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands
InvalidParameterValueException
The input does not match the requirements (values, order of values)
Source code in eis_toolkit/transformations/logarithmic.py
@beartype\ndef log_transform( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n log_transform: Sequence[str] = [\"log2\"],\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Perform a logarithmic transformation on the provided data.\n\n Takes one nodata value that will be ignored in calculations.\n Negative values will not be considered for transformation and replaced by the specific nodata value.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The log_transform can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n log_transform: The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands\n InvalidParameterValueException: The input does not match the requirements (values, order of values)\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, log_transform) is False:\n raise NonMatchingParameterLengthsException(\"Invalid length for log-base values.\")\n\n for item in log_transform:\n if not (item == \"ln\" or item == \"log2\" or item == \"log10\"):\n raise InvalidParameterValueException(f\"Invalid method: {item}.\")\n\n expanded_args = expand_and_zip(bands, log_transform)\n log_transform = [element[1] for element in expanded_args]\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n band_array[band_array <= 0] = np.nan\n\n if log_transform[i] == \"ln\":\n band_array = _log_transform_ln(band_array.astype(np.float64))\n elif log_transform[i] == \"log2\":\n band_array = _log_transform_log2(band_array.astype(np.float64))\n elif log_transform[i] == \"log10\":\n band_array = _log_transform_log10(band_array.astype(np.float64))\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"log_transform\": log_transform[i],\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/sigmoid/","title":"Sigmoid","text":""},{"location":"transformations/sigmoid/#eis_toolkit.transformations.sigmoid.sigmoid_transform","title":"sigmoid_transform(raster, bands=None, bounds=[(0, 1)], slope=[1], center=True, nodata=None)
","text":"Transform data into a sigmoid-shape based on a specified new range.
Uses the provided new minimum and maximum, shift and slope parameters to transform the data. Takes one nodata value that will be ignored in calculations.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The bounds and slope values can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
bounds
Sequence[Tuple[Number, Number]]
Boundaries for the calculation of the sigmoid function (lower, upper).
[(0, 1)]
slope
Sequence[Number]
Value which modifies the slope of the resulting sigmoid-curve.
[1]
center
bool
Center array values around mean = 0 before sigmoid transformation.
True
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values)
Source code in eis_toolkit/transformations/sigmoid.py
@beartype\ndef sigmoid_transform( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n bounds: Sequence[Tuple[Number, Number]] = [(0, 1)],\n slope: Sequence[Number] = [1],\n center: bool = True,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Transform data into a sigmoid-shape based on a specified new range.\n\n Uses the provided new minimum and maximum, shift and slope parameters to transform the data.\n Takes one nodata value that will be ignored in calculations.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The bounds and slope values can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n bounds: Boundaries for the calculation of the sigmoid function (lower, upper).\n slope: Value which modifies the slope of the resulting sigmoid-curve.\n center: Center array values around mean = 0 before sigmoid transformation.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values)\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n for parameter_name, parameter in [(\"bounds\", bounds), (\"slope\", slope)]:\n if check_parameter_length(bands, parameter) is False:\n raise NonMatchingParameterLengthsException(f\"Invalid length for {parameter_name}.\")\n\n for item in bounds:\n if check_minmax_position(item) is False:\n raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n expanded_args = expand_and_zip(bands, bounds, slope)\n bounds = [element[1] for element in expanded_args]\n slope = [element[2] for element in expanded_args]\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n band_array = _sigmoid_transform(band_array.astype(np.float64), bounds=bounds[i], slope=slope[i], center=center)\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"bound_lower\": truncate_decimal_places(bounds[i][0], decimal_places=out_decimals),\n \"bound_upper\": truncate_decimal_places(bounds[i][1], decimal_places=out_decimals),\n \"slope\": slope[i],\n \"center\": center,\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/winsorize/","title":"Winsorize","text":""},{"location":"transformations/winsorize/#eis_toolkit.transformations.winsorize.winsorize","title":"winsorize(raster, percentiles, bands=None, inside=False, nodata=None)
","text":"Winsorize data based on specified percentile values.
Takes one nodata value that will be ignored in calculations. Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided. Works both one-sided and two-sided but raises error if no percentile values provided.
Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%]. And percentile_upper = 10 corresponds to the intervall [90%, max]. I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.
Calculation of percentiles is ambiguous. Users can choose whether to use the value for replacement from inside or outside of the respective interval. Example: Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated percentiles are (5, 35) for inside and (10, 30) for outside. This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The percentiles can be set for each band individually, but inside parameter is same for all bands.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
percentiles
Sequence[Tuple[Optional[Number], Optional[Number]]]
Lower and upper percentile values (lower, upper) between [0, 100].
required inside
bool
Whether to use the value for replacement from the left or right of the calculated percentile.
False
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values)
Source code in eis_toolkit/transformations/winsorize.py
@beartype\ndef winsorize( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n percentiles: Sequence[Tuple[Optional[Number], Optional[Number]]],\n bands: Optional[Sequence[int]] = None,\n inside: bool = False,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Winsorize data based on specified percentile values.\n\n Takes one nodata value that will be ignored in calculations.\n Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided.\n Works both one-sided and two-sided but raises error if no percentile values provided.\n\n Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%].\n And percentile_upper = 10 corresponds to the intervall [90%, max].\n I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.\n\n Calculation of percentiles is ambiguous. Users can choose whether to use the value\n for replacement from inside or outside of the respective interval. Example:\n Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated\n percentiles are (5, 35) for inside and (10, 30) for outside.\n This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The percentiles can be set for each band individually, but inside parameter is same for all bands.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n percentiles: Lower and upper percentile values (lower, upper) between [0, 100].\n inside: Whether to use the value for replacement from the left or right of the calculated percentile.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values)\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, percentiles) is False:\n raise NonMatchingParameterLengthsException(\"Invalid length for percentiles.\")\n\n for item in percentiles:\n if item.count(None) == len(item):\n raise InvalidParameterValueException(f\"Percentile values all None: {item}.\")\n\n if None not in item and sum(item) >= 100:\n raise InvalidParameterValueException(f\"Sum >= 100: {item}.\")\n\n if item[0] is not None and not (0 < item[0] < 100):\n raise InvalidParameterValueException(f\"Invalid lower percentile value: {item}.\")\n\n if item[1] is not None and not (0 < item[1] < 100):\n raise InvalidParameterValueException(f\"Invalid upper percentile value: {item}.\")\n\n expanded_args = expand_and_zip(bands, percentiles)\n percentiles = [element[1] for element in expanded_args]\n\n out_settings = {}\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n inital_dtype = band_array.dtype\n\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n band_array, calculated_lower, calculated_upper = _winsorize(\n band_array, percentiles=percentiles[i], inside=inside\n )\n\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"percentile_lower\": cast_scalar_to_int(percentiles[i][0]),\n \"percentile_upper\": cast_scalar_to_int(percentiles[i][1]),\n \"calculated_lower\": cast_scalar_to_int(calculated_lower),\n \"calculated_upper\": cast_scalar_to_int(calculated_upper),\n \"nodata\": cast_scalar_to_int(nodata),\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"validation/calculate_auc/","title":"Calculate AUC","text":""},{"location":"validation/calculate_auc/#eis_toolkit.validation.calculate_auc.calculate_auc","title":"calculate_auc(x_values, y_values)
","text":"Calculate area under curve (AUC).
Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.
Parameters:
Name Type Description Default x_values
ndarray
Either proportion of area or false positive rate values.
required y_values
ndarray
True positive rate values.
required Returns:
Type Description float
The area under curve.
Raises:
Type Description InvalidParameterValueException
x_values or y_values are out of bounds.
Source code in eis_toolkit/validation/calculate_auc.py
@beartype\ndef calculate_auc(x_values: np.ndarray, y_values: np.ndarray) -> float:\n \"\"\"Calculate area under curve (AUC).\n\n Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be\n always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.\n\n Args:\n x_values: Either proportion of area or false positive rate values.\n y_values: True positive rate values.\n\n Returns:\n The area under curve.\n\n Raises:\n InvalidParameterValueException: x_values or y_values are out of bounds.\n \"\"\"\n if x_values.max() > 1 or x_values.min() < 0:\n raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n if y_values.max() > 1 or y_values.min() < 0:\n raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n auc_value = _calculate_auc(x_values=x_values, y_values=y_values)\n return auc_value\n
"},{"location":"validation/calculate_base_metrics/","title":"Calculate base metrics","text":""},{"location":"validation/calculate_base_metrics/#eis_toolkit.validation.calculate_base_metrics.calculate_base_metrics","title":"calculate_base_metrics(raster, deposits, band=1, negatives=None)
","text":"Calculate true positive rate, proportion of area and false positive rate values for different thresholds.
Function calculates true positive rate, proportion of area and false positive rate values for different thresholds which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false positive rate is optional and is only done if negative point locations are provided.
Parameters:
Name Type Description Default raster
DatasetReader
Mineral prospectivity map or evidence layer.
required deposits
GeoDataFrame
Mineral deposit locations as points.
required band
int
Band index of the mineral prospectivity map. Defaults to 1.
1
negatives
Optional[GeoDataFrame]
Negative locations as points.
None
Returns:
Type Description DataFrame
DataFrame containing true positive rate, proportion of area, threshold values and false positive rate (optional) values.
Raises:
Type Description NonMatchingCrsException
The raster and point data are not in the same CRS.
NotApplicableGeometryTypeException
The input geometries contain non-point features.
Source code in eis_toolkit/validation/calculate_base_metrics.py
@beartype\ndef calculate_base_metrics(\n raster: rasterio.io.DatasetReader,\n deposits: geopandas.GeoDataFrame,\n band: int = 1,\n negatives: Optional[geopandas.GeoDataFrame] = None,\n) -> pd.DataFrame:\n \"\"\"Calculate true positive rate, proportion of area and false positive rate values for different thresholds.\n\n Function calculates true positive rate, proportion of area and false positive rate values for different thresholds\n which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false\n positive rate is optional and is only done if negative point locations are provided.\n\n Args:\n raster: Mineral prospectivity map or evidence layer.\n deposits: Mineral deposit locations as points.\n band: Band index of the mineral prospectivity map. Defaults to 1.\n negatives: Negative locations as points.\n\n Returns:\n DataFrame containing true positive rate, proportion of area, threshold values and false positive\n rate (optional) values.\n\n Raises:\n NonMatchingCrsException: The raster and point data are not in the same CRS.\n NotApplicableGeometryTypeException: The input geometries contain non-point features.\n \"\"\"\n if negatives is not None:\n geometries = pd.concat([deposits, negatives]).geometry\n else:\n geometries = deposits[\"geometry\"]\n\n if not check_matching_crs(\n objects=[raster, geometries],\n ):\n raise NonMatchingCrsException(\"The raster and deposits are not in the same CRS.\")\n\n if not check_geometry_types(\n geometries=geometries,\n allowed_types=[\"Point\"],\n ):\n raise NotApplicableGeometryTypeException(\"The input geometries contain non-point features.\")\n\n base_metrics = _calculate_base_metrics(raster=raster, deposits=deposits, band=band, negatives=negatives)\n\n return base_metrics\n
"},{"location":"validation/get_pa_intersection/","title":"Get P-A plot intersection point","text":""},{"location":"validation/get_pa_intersection/#eis_toolkit.validation.get_pa_intersection.get_pa_intersection","title":"get_pa_intersection(true_positive_rate_values, proportion_of_area_values, threshold_values)
","text":"Calculate the intersection point for prediction rate and area curves in (P-A plot).
Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis. Area curve uses inverted proportion of area as y-axis.
Parameters:
Name Type Description Default true_positive_rate_values
ndarray
True positive rate values, values should be within range 0-1.
required proportion_of_area_values
ndarray
Proportion of area values, values should be within range 0-1.
required threshold_values
ndarray
Threshold values that were used to calculate true positive rate and proportion of area.
required Returns:
Type Description Tuple[float, float]
X and y coordinates of the intersection point.
Raises:
Type Description InvalidParameterValueException
true_positive_rate_values or proportion_of_area_values values are out of bounds.
Source code in eis_toolkit/validation/get_pa_intersection.py
@beartype\ndef get_pa_intersection(\n true_positive_rate_values: np.ndarray, proportion_of_area_values: np.ndarray, threshold_values: np.ndarray\n) -> Tuple[float, float]:\n \"\"\"Calculate the intersection point for prediction rate and area curves in (P-A plot).\n\n Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis.\n Area curve uses inverted proportion of area as y-axis.\n\n Args:\n true_positive_rate_values: True positive rate values, values should be within range 0-1.\n proportion_of_area_values: Proportion of area values, values should be within range 0-1.\n threshold_values: Threshold values that were used to calculate true positive rate and proportion of area.\n\n Returns:\n X and y coordinates of the intersection point.\n\n Raises:\n InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n \"\"\"\n if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n raise InvalidParameterValueException(\"true_positive_rate_values values should be within range 0-1\")\n\n if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n raise InvalidParameterValueException(\"proportion_of_area_values values should be within range 0-1\")\n\n intersection = _get_pa_intersection(\n true_positive_rate_values=true_positive_rate_values,\n proportion_of_area_values=proportion_of_area_values,\n threshold_values=threshold_values,\n )\n\n return intersection.x, intersection.y\n
"},{"location":"validation/plot_correlation_matrix/","title":"Plot correlation matrix","text":""},{"location":"validation/plot_correlation_matrix/#eis_toolkit.validation.plot_correlation_matrix.plot_correlation_matrix","title":"plot_correlation_matrix(matrix, annotate=True, cmap=None, plot_title=None, **kwargs)
","text":"Create a Seaborn heatmap to visualize correlation matrix.
Parameters:
Name Type Description Default matrix
DataFrame
Correlation matrix as a DataFrame.
required annotate
bool
If plot squares should display the correlation values. Defaults to True.
True
cmap
Optional[ListedColormap]
Colormap for plotting. Optional parameter. Defaults to None, in which case a default colormap is used.
None
plot_title
Optional[str]
Title of the plot. Optional parameter, defaults to none (no title).
None
**kwargs
dict
Additional parameters to pass to Seaborn and matplotlib.
{}
Returns:
Type Description Axes
Matplotlib axes object with the produced plot.
Raises:
Type Description EmptyDataFrameException
Input matrix is empty.
Source code in eis_toolkit/validation/plot_correlation_matrix.py
def plot_correlation_matrix(\n matrix: pd.DataFrame,\n annotate: bool = True,\n cmap: Optional[matplotlib.colors.ListedColormap] = None,\n plot_title: Optional[str] = None,\n **kwargs: dict\n) -> matplotlib.axes.Axes:\n \"\"\"\n Create a Seaborn heatmap to visualize correlation matrix.\n\n Args:\n matrix: Correlation matrix as a DataFrame.\n annotate: If plot squares should display the correlation values. Defaults to True.\n cmap: Colormap for plotting. Optional parameter. Defaults to None, in which\n case a default colormap is used.\n plot_title: Title of the plot. Optional parameter, defaults to none (no title).\n **kwargs: Additional parameters to pass to Seaborn and matplotlib.\n\n Returns:\n Matplotlib axes object with the produced plot.\n\n Raises:\n EmptyDataFrameException: Input matrix is empty.\n \"\"\"\n if matrix.empty:\n raise exceptions.EmptyDataFrameException(\"Input matrix DataFrame is empty.\")\n\n # Mask for the upper triangle of the heatmap\n mask = np.triu(np.ones_like(matrix, dtype=bool))\n\n if cmap is None:\n # Generate a default diverging colormap\n cmap = sns.diverging_palette(230, 20, as_cmap=True)\n\n ax = sns.heatmap(\n matrix,\n mask=mask,\n cmap=cmap,\n vmax=0.3,\n center=0,\n square=True,\n linewidths=0.5,\n annot=annotate,\n cbar_kws={\"shrink\": 0.5},\n **kwargs\n )\n if plot_title is not None:\n ax.set_title(plot_title)\n\n return ax\n
"},{"location":"validation/plot_prediction_area_curves/","title":"Plot prediction-area (P-A) curves","text":""},{"location":"validation/plot_prediction_area_curves/#eis_toolkit.validation.plot_prediction_area_curves.plot_prediction_area_curves","title":"plot_prediction_area_curves(true_positive_rate_values, proportion_of_area_values, threshold_values)
","text":"Plot prediction-area (P-A) plot.
Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g., Yousefi and Carranza (2015).
Parameters:
Name Type Description Default true_positive_rate_values
ndarray
True positive rate values.
required proportion_of_area_values
ndarray
Proportion of area values.
required threshold_values
ndarray
Threshold values.
required Returns:
Type Description Figure
P-A plot figure object.
Raises:
Type Description InvalidParameterValueException
true_positive_rate_values or proportion_of_area_values values are out of bounds.
References Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.
Source code in eis_toolkit/validation/plot_prediction_area_curves.py
@beartype\ndef plot_prediction_area_curves(\n true_positive_rate_values: np.ndarray, proportion_of_area_values: np.ndarray, threshold_values: np.ndarray\n) -> matplotlib.figure.Figure:\n \"\"\"Plot prediction-area (P-A) plot.\n\n Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g.,\n Yousefi and Carranza (2015).\n\n Args:\n true_positive_rate_values: True positive rate values.\n proportion_of_area_values: Proportion of area values.\n threshold_values: Threshold values.\n\n Returns:\n P-A plot figure object.\n\n Raises:\n InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n\n References:\n Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral\n prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.\n \"\"\"\n if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n raise InvalidParameterValueException(\"true_positive_rate values should be within range 0-1\")\n\n if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n raise InvalidParameterValueException(\"proportion_of_area values should be within range 0-1\")\n\n fig = _plot_prediction_area_curves(\n true_positive_rate_values=true_positive_rate_values,\n proportion_of_area_values=proportion_of_area_values,\n threshold_values=threshold_values,\n )\n return fig\n
"},{"location":"validation/plot_rate_curve/","title":"Plot rate curve","text":""},{"location":"validation/plot_rate_curve/#eis_toolkit.validation.plot_rate_curve.plot_rate_curve","title":"plot_rate_curve(x_values, y_values, plot_type='success_rate')
","text":"Plot success rate, prediction rate or ROC curve.
Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.
Parameters:
Name Type Description Default x_values
ndarray
False positive rate values or proportion of area values.
required y_values
ndarray
True positive rate values.
required plot_type
str
Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".
'success_rate'
Returns:
Type Description Figure
Success rate, prediction rate or ROC plot figure object.
Raises:
Type Description InvalidParameterValueException
Invalid plot type.
InvalidParameterValueException
x_values or y_values are out of bounds.
Source code in eis_toolkit/validation/plot_rate_curve.py
@beartype\ndef plot_rate_curve(\n x_values: np.ndarray,\n y_values: np.ndarray,\n plot_type: str = \"success_rate\",\n) -> matplotlib.figure.Figure:\n \"\"\"Plot success rate, prediction rate or ROC curve.\n\n Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false\n positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.\n\n Args:\n x_values: False positive rate values or proportion of area values.\n y_values: True positive rate values.\n plot_type: Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".\n\n Returns:\n Success rate, prediction rate or ROC plot figure object.\n\n Raises:\n InvalidParameterValueException: Invalid plot type.\n InvalidParameterValueException: x_values or y_values are out of bounds.\n \"\"\"\n if plot_type == \"success_rate\":\n label = \"Success rate\"\n xlab = \"Proportion of area\"\n elif plot_type == \"prediction_rate\":\n label = \"Prediction rate\"\n xlab = \"Proportion of area\"\n elif plot_type == \"roc\":\n label = \"ROC\"\n xlab = \"False positive rate\"\n else:\n raise InvalidParameterValueException(\"Invalid plot type\")\n\n if x_values.max() > 1 or x_values.min() < 0:\n raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n if y_values.max() > 1 or y_values.min() < 0:\n raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n fig = _plot_rate_curve(x_values=x_values, y_values=y_values, label=label, xlab=xlab)\n\n return fig\n
"},{"location":"vector_processing/cell_based_association/","title":"Cell-Based Association","text":""},{"location":"vector_processing/cell_based_association/#eis_toolkit.vector_processing.cell_based_association.cell_based_association","title":"cell_based_association(cell_size, geodata, output_path, column=None, subset_target_attribute_values=None, add_name=None, add_buffer=None)
","text":"Creation of CBA matrix.
Initializes a CBA matrix from a vector file. The mesh is calculated according to the geometries contained in this file and the size of cells. Allows to add multiple vector data to the matrix, based on targeted shapes and/or attributes.
Parameters:
Name Type Description Default cell_size
int
Size of the cells.
required geodata
List[GeoDataFrame]
GeoDataFrame to create the CBA matrix. Additional GeoDataFrame(s) can be imputed to add to the CBA matrix.
required output_path
str
Name of the saved .tif file.
required column
Optional[List[str]]
Name of the column of interest. If no attribute is specified, then an artificial attribute is created representing the presence or absence of the geometries of this file for each cell of the CBA grid. A categorical attribute will generate as many columns (binary) in the CBA matrix than values considered of interest (dummification). See parameter . Additional column(s) can be imputed for each added GeoDataFrame(s). None
subset_target_attribute_values
Optional[List[Union[None, list, str]]]
List of values of interest of the target attribute, in case a categorical target attribute has been specified. Allows to filter a subset of relevant values. Additional values can be imputed for each added GeoDataFrame(s).
None
add_name
Optional[List[Union[str, None]]]
Name of the column(s) to add to the matrix.
None
add_buffer
Optional[List[Union[Number, bool]]]
Allow the use of a buffer around shapes before the intersection with CBA cells for the added GeoDataFrame(s). Minimize border effects or allow increasing positive samples (i.e. cells with mineralization). The size of the buffer is computed using the CRS (if projected CRS in meters: value in meters).
None
Returns:
Type Description GeoDataFrame
CBA matrix is created.
Source code in eis_toolkit/vector_processing/cell_based_association.py
@beartype\ndef cell_based_association(\n cell_size: int,\n geodata: List[gpd.GeoDataFrame],\n output_path: str,\n column: Optional[List[str]] = None,\n subset_target_attribute_values: Optional[List[Union[None, list, str]]] = None,\n add_name: Optional[List[Union[str, None]]] = None,\n add_buffer: Optional[List[Union[Number, bool]]] = None,\n) -> gpd.GeoDataFrame:\n \"\"\"Creation of CBA matrix.\n\n Initializes a CBA matrix from a vector file. The mesh is calculated\n according to the geometries contained in this file and the size of cells.\n Allows to add multiple vector data to the matrix, based on targeted shapes\n and/or attributes.\n\n Args:\n cell_size: Size of the cells.\n geodata: GeoDataFrame to create the CBA matrix. Additional\n GeoDataFrame(s) can be imputed to add to the CBA matrix.\n output_path: Name of the saved .tif file.\n column: Name of the column of interest. If no attribute is specified,\n then an artificial attribute is created representing the presence\n or absence of the geometries of this file for each cell of the CBA\n grid. A categorical attribute will generate as many columns (binary)\n in the CBA matrix than values considered of interest (dummification).\n See parameter <subset_target_attribute_values>. Additional\n column(s) can be imputed for each added GeoDataFrame(s).\n subset_target_attribute_values: List of values of interest of the\n target attribute, in case a categorical target attribute has been\n specified. Allows to filter a subset of relevant values. Additional\n values can be imputed for each added GeoDataFrame(s).\n add_name: Name of the column(s) to add to the matrix.\n add_buffer: Allow the use of a buffer around shapes before the\n intersection with CBA cells for the added GeoDataFrame(s). Minimize\n border effects or allow increasing positive samples (i.e. cells\n with mineralization). The size of the buffer is computed using the\n CRS (if projected CRS in meters: value in meters).\n\n Returns:\n CBA matrix is created.\n \"\"\"\n\n # Swapping None to list values\n if column is None:\n column = [\"\"]\n if add_buffer is None:\n add_buffer = [False]\n\n # Consistency checks on input data\n for frame in geodata:\n if frame.empty:\n raise exceptions.EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if cell_size <= 0:\n raise exceptions.InvalidParameterValueException(\"Expected cell size to be positive and non-zero.\")\n\n add_buffer = [False if x == 0 else x for x in add_buffer]\n if any(num < 0 for num in add_buffer):\n raise exceptions.InvalidParameterValueException(\"Expected buffer value to be positive, null or False.\")\n\n for i, name in enumerate(column):\n if column[i] == \"\":\n if subset_target_attribute_values[i] is not None:\n raise exceptions.InvalidParameterValueException(\"Can't use subset of values if no column is targeted.\")\n elif column[i] not in geodata[i]:\n raise exceptions.InvalidColumnException(\"Targeted column not found in the GeoDataFrame.\")\n\n for i, subset in enumerate(subset_target_attribute_values):\n if subset is not None:\n for value in subset:\n if value not in geodata[i][column[i]].unique():\n raise exceptions.InvalidParameterValueException(\n \"Subset of value(s) not found in the targeted column.\"\n )\n\n # Computation\n for i, data in enumerate(geodata):\n if i == 0:\n # Initialization of the CBA matrix\n grid, cba = _init_from_vector_data(cell_size, geodata[0], column[0], subset_target_attribute_values[0])\n else:\n # If necessary, adding data to matrix\n cba = _add_layer(\n cba,\n grid,\n geodata[i],\n column[i],\n subset_target_attribute_values[i],\n add_name[i - 1],\n add_buffer[i - 1],\n )\n\n # Export\n _to_raster(cba, output_path)\n\n return cba\n
"},{"location":"vector_processing/distance_computation/","title":"Distance computation","text":""},{"location":"vector_processing/distance_computation/#eis_toolkit.vector_processing.distance_computation.distance_computation","title":"distance_computation(raster_profile, geometries)
","text":"Calculate distance from raster cell to nearest geometry.
Parameters:
Name Type Description Default raster_profile
Union[Profile, dict]
The raster profile of the raster in which the distances to the nearest geometry are determined.
required geometries
GeoDataFrame
The geometries to determine distance to.
required Returns:
Type Description ndarray
A 2D numpy array with the distances computed.
Source code in eis_toolkit/vector_processing/distance_computation.py
@beartype\ndef distance_computation(raster_profile: Union[profiles.Profile, dict], geometries: gpd.GeoDataFrame) -> np.ndarray:\n \"\"\"Calculate distance from raster cell to nearest geometry.\n\n Args:\n raster_profile: The raster profile of the raster in which the distances\n to the nearest geometry are determined.\n geometries: The geometries to determine distance to.\n\n Returns:\n A 2D numpy array with the distances computed.\n\n \"\"\"\n if raster_profile.get(\"crs\") != geometries.crs:\n raise exceptions.NonMatchingCrsException(\"Expected coordinate systems to match between raster and geometries. \")\n if geometries.shape[0] == 0:\n raise exceptions.EmptyDataFrameException(\"Expected GeoDataFrame to not be empty.\")\n\n raster_width = raster_profile.get(\"width\")\n raster_height = raster_profile.get(\"height\")\n\n if not isinstance(raster_width, int) or not isinstance(raster_height, int):\n raise exceptions.InvalidParameterValueException(\n f\"Expected raster_profile to contain integer width and height. {raster_profile}\"\n )\n\n raster_transform = raster_profile.get(\"transform\")\n\n if not isinstance(raster_transform, transform.Affine):\n raise exceptions.InvalidParameterValueException(\n f\"Expected raster_profile to contain an affine transformation. {raster_profile}\"\n )\n\n return _distance_computation(\n raster_width=raster_width, raster_height=raster_height, raster_transform=raster_transform, geometries=geometries\n )\n
"},{"location":"vector_processing/idw_interpolation/","title":"IDW","text":""},{"location":"vector_processing/idw_interpolation/#eis_toolkit.vector_processing.idw_interpolation.idw","title":"idw(geodataframe, target_column, resolution, extent=None, power=2)
","text":"Calculate inverse distance weighted (IDW) interpolation.
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The vector dataframe to be interpolated.
required target_column
str
The column name with values for each geometry.
required resolution
Tuple[Number, Number]
The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).
required extent
Optional[Tuple[Number, Number, Number, Number]]
The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.
None
power
Number
The value for determining the rate at which the weights decrease. As power increases, the weights for distant points decrease rapidly. Defaults to 2.
2
Returns:
Type Description Tuple[ndarray, dict]
Rasterized vector data and metadata.
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterValueException
Invalid resolution or target_column.
Source code in eis_toolkit/vector_processing/idw_interpolation.py
@beartype\ndef idw(\n geodataframe: gpd.GeoDataFrame,\n target_column: str,\n resolution: Tuple[Number, Number],\n extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n power: Number = 2,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Calculate inverse distance weighted (IDW) interpolation.\n\n Args:\n geodataframe: The vector dataframe to be interpolated.\n target_column: The column name with values for each geometry.\n resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n If None, calculate extent from the input vector data.\n power: The value for determining the rate at which the weights decrease.\n As power increases, the weights for distant points decrease rapidly.\n Defaults to 2.\n\n Returns:\n Rasterized vector data and metadata.\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterValueException: Invalid resolution or target_column.\n \"\"\"\n\n if geodataframe.shape[0] == 0:\n raise EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n if target_column not in geodataframe.columns:\n raise InvalidParameterValueException(\n f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n )\n\n if resolution[0] <= 0 or resolution[1] <= 0:\n raise InvalidParameterValueException(\"Expected height and width greater than zero.\")\n\n interpolated_values, out_meta = _idw_interpolation(geodataframe, target_column, resolution, power, extent)\n\n return interpolated_values, out_meta\n
"},{"location":"vector_processing/kriging_interpolation/","title":"Kriging interpolation","text":""},{"location":"vector_processing/kriging_interpolation/#eis_toolkit.vector_processing.kriging_interpolation.kriging","title":"kriging(data, target_column, resolution, extent=None, variogram_model='linear', coordinates_type='geographic', method='ordinary')
","text":"Perform Kriging interpolation on the input data.
Parameters:
Name Type Description Default data
GeoDataFrame
GeoDataFrame containing the input data.
required target_column
str
The column name with values for each geometry.
required resolution
Tuple[Number, Number]
The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).
required extent
Optional[Tuple[Number, Number, Number, Number]]
The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.
None
variogram_model
Literal[linear, power, gaussian, spherical, exponential]
Variogram model to be used. Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.
'linear'
coordinates_type
Literal[euclidean, geographic]
Determines are coordinates on a plane ('euclidean') or a sphere ('geographic'). Used only in ordinary kriging. Defaults to 'geographic'.
'geographic'
method
Literal[ordinary, universal]
Ordinary or universal kriging. Defaults to 'ordinary'.
'ordinary'
Returns:
Type Description Tuple[ndarray, dict]
Grid containing the interpolated values and metadata.
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterValueException
Target column name is invalid or resolution is not greater than zero.
Source code in eis_toolkit/vector_processing/kriging_interpolation.py
@beartype\ndef kriging(\n data: gpd.GeoDataFrame,\n target_column: str,\n resolution: Tuple[Number, Number],\n extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n variogram_model: Literal[\"linear\", \"power\", \"gaussian\", \"spherical\", \"exponential\"] = \"linear\",\n coordinates_type: Literal[\"euclidean\", \"geographic\"] = \"geographic\",\n method: Literal[\"ordinary\", \"universal\"] = \"ordinary\",\n) -> Tuple[np.ndarray, dict]:\n \"\"\"\n Perform Kriging interpolation on the input data.\n\n Args:\n data: GeoDataFrame containing the input data.\n target_column: The column name with values for each geometry.\n resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n If None, calculate extent from the input vector data.\n variogram_model: Variogram model to be used.\n Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.\n coordinates_type: Determines are coordinates on a plane ('euclidean') or a sphere ('geographic').\n Used only in ordinary kriging. Defaults to 'geographic'.\n method: Ordinary or universal kriging. Defaults to 'ordinary'.\n\n Returns:\n Grid containing the interpolated values and metadata.\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterValueException: Target column name is invalid or resolution is not greater than zero.\n \"\"\"\n\n if data.empty:\n raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if target_column not in data.columns:\n raise InvalidParameterValueException(\n f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n )\n\n if resolution[0] <= 0 or resolution[1] <= 0:\n raise InvalidParameterValueException(\"The resolution must be greater than zero.\")\n\n data_interpolated, out_meta = _kriging(\n data, target_column, resolution, extent, variogram_model, coordinates_type, method\n )\n\n return data_interpolated, out_meta\n
"},{"location":"vector_processing/rasterize_vector/","title":"Rasterize vector","text":""},{"location":"vector_processing/rasterize_vector/#eis_toolkit.vector_processing.rasterize_vector.rasterize_vector","title":"rasterize_vector(geodataframe, resolution=None, value_column=None, default_value=1.0, fill_value=0.0, base_raster_profile=None, buffer_value=None, merge_strategy='replace')
","text":"Transform vector data into raster data.
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The vector dataframe to be rasterized.
required resolution
Optional[float]
The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.
None
value_column
Optional[str]
The column name with values for each geometry. If None, then default_value is used for all geometries.
None
default_value
float
Default value burned into raster cells based on geometries.
1.0
base_raster_profile
Optional[Union[Profile, dict]]
Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.
None
fill_value
float
Value used outside the burned/rasterized geometry cells.
0.0
buffer_value
Optional[float]
For adding a buffer around passed geometries before rasterization.
None
merge_strategy
Literal[replace, add]
How to handle overlapping geometries. \"add\" causes overlapping geometries to add together the values while \"replace\" does not. Adding them together is the basis for density computations where the density can be calculated by using a default value of 1.0 and the sum in each cell is the count of intersecting geometries.
'replace'
Returns:
Type Description Tuple[ndarray, dict]
Rasterized vector data and metadata.
Source code in eis_toolkit/vector_processing/rasterize_vector.py
@beartype\ndef rasterize_vector(\n geodataframe: gpd.GeoDataFrame,\n resolution: Optional[float] = None,\n value_column: Optional[str] = None,\n default_value: float = 1.0,\n fill_value: float = 0.0,\n base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n buffer_value: Optional[float] = None,\n merge_strategy: Literal[\"replace\", \"add\"] = \"replace\",\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Transform vector data into raster data.\n\n Args:\n geodataframe: The vector dataframe to be rasterized.\n resolution: The resolution i.e. cell size of the output raster.\n Optional if base_raster_profile is given.\n value_column: The column name with values for each geometry.\n If None, then default_value is used for all geometries.\n default_value: Default value burned into raster cells based on geometries.\n base_raster_profile: Base raster profile\n to be used for determining the grid on which vectors are\n burned in. If None, the geometries and provided resolution\n value are used to compute grid.\n fill_value: Value used outside the burned/rasterized geometry cells.\n buffer_value: For adding a buffer around passed\n geometries before rasterization.\n merge_strategy: How to handle overlapping geometries.\n \"add\" causes overlapping geometries to add together the\n values while \"replace\" does not. Adding them together is the\n basis for density computations where the density can be\n calculated by using a default value of 1.0 and the sum in\n each cell is the count of intersecting geometries.\n\n Returns:\n Rasterized vector data and metadata.\n \"\"\"\n\n if geodataframe.shape[0] == 0:\n # Empty GeoDataFrame\n raise exceptions.EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n if resolution is None and base_raster_profile is None:\n raise exceptions.InvalidParameterValueException(\n \"Expected either resolution or base_raster_profile to be given.\"\n )\n if resolution is not None and resolution <= 0:\n raise exceptions.NumericValueSignException(\n f\"Expected a positive value resolution ({dict(resolution=resolution)})\"\n )\n if value_column is not None and value_column not in geodataframe.columns:\n raise exceptions.InvalidParameterValueException(\n f\"Expected value_column ({value_column}) to be contained in geodataframe columns.\"\n )\n if buffer_value is not None and buffer_value < 0:\n raise exceptions.NumericValueSignException(\n f\"Expected a positive buffer_value ({dict(buffer_value=buffer_value)})\"\n )\n\n if base_raster_profile is not None and not isinstance(base_raster_profile, (profiles.Profile, dict)):\n raise exceptions.InvalidParameterValueException(\n f\"Expected base_raster_profile ({type(base_raster_profile)}) to be dict or rasterio.profiles.Profile.\"\n )\n\n if buffer_value is not None:\n geodataframe = geodataframe.copy()\n geodataframe[\"geometry\"] = geodataframe[\"geometry\"].apply(lambda geom: geom.buffer(buffer_value))\n\n return _rasterize_vector(\n geodataframe=geodataframe,\n value_column=value_column,\n default_value=default_value,\n fill_value=fill_value,\n base_raster_profile=base_raster_profile,\n resolution=resolution,\n merge_alg=getattr(MergeAlg, merge_strategy),\n )\n
"},{"location":"vector_processing/reproject_vector/","title":"Reproject vector","text":""},{"location":"vector_processing/reproject_vector/#eis_toolkit.vector_processing.reproject_vector.reproject_vector","title":"reproject_vector(geodataframe, target_crs)
","text":"Reprojects vector data to match given coordinate reference system (EPSG).
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The vector dataframe to be reprojected.
required target_crs
int
Target CRS as an EPSG code.
required Returns:
Type Description GeoDataFrame
Reprojected vector data.
Source code in eis_toolkit/vector_processing/reproject_vector.py
@beartype\ndef reproject_vector(geodataframe: geopandas.GeoDataFrame, target_crs: int) -> geopandas.GeoDataFrame:\n \"\"\"Reprojects vector data to match given coordinate reference system (EPSG).\n\n Args:\n geodataframe: The vector dataframe to be reprojected.\n target_crs: Target CRS as an EPSG code.\n\n Returns:\n Reprojected vector data.\n \"\"\"\n\n if geodataframe.crs.to_epsg() == target_crs:\n raise MatchingCrsException(\"Vector data is already in the target CRS.\")\n\n reprojected_gdf = geodataframe.to_crs(\"epsg:\" + str(target_crs))\n return reprojected_gdf\n
"},{"location":"vector_processing/vector_density/","title":"Vector density","text":""},{"location":"vector_processing/vector_density/#eis_toolkit.vector_processing.vector_density.vector_density","title":"vector_density(geodataframe, resolution=None, base_raster_profile=None, buffer_value=None, statistic='density')
","text":"Compute density of geometries within raster.
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The dataframe with vectors of which density is computed.
required resolution
Optional[float]
The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.
None
base_raster_profile
Optional[Union[Profile, dict]]
Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.
None
buffer_value
Optional[float]
For adding a buffer around passed geometries before computing density.
None
Returns:
Type Description Tuple[ndarray, dict]
Computed density of vector data and metadata.
Source code in eis_toolkit/vector_processing/vector_density.py
@beartype\ndef vector_density(\n geodataframe: gpd.GeoDataFrame,\n resolution: Optional[float] = None,\n base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n buffer_value: Optional[float] = None,\n statistic: Literal[\"density\", \"count\"] = \"density\",\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Compute density of geometries within raster.\n\n Args:\n geodataframe: The dataframe with vectors\n of which density is computed.\n resolution: The resolution i.e. cell size of the output raster.\n Optional if base_raster_profile is given.\n base_raster_profile: Base raster profile\n to be used for determining the grid on which vectors are\n burned in. If None, the geometries and provided resolution\n value are used to compute grid.\n buffer_value: For adding a buffer around passed\n geometries before computing density.\n\n Returns:\n Computed density of vector data and metadata.\n \"\"\"\n out_raster_array, out_metadata = rasterize_vector(\n geodataframe=geodataframe,\n resolution=resolution,\n base_raster_profile=base_raster_profile,\n buffer_value=buffer_value,\n value_column=None,\n default_value=1.0,\n fill_value=0.0,\n merge_strategy=\"add\",\n )\n max_count = np.max(out_raster_array)\n if statistic == \"count\" or np.isclose(max_count, 0.0):\n return out_raster_array, out_metadata\n else:\n return (out_raster_array / max_count), out_metadata\n
"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"General","text":"This is the documentation site of the eis_toolkit python package. Here you can find documentation for each module. The documentation is automatically generated from docstrings.
Development of eis_toolkit is related to EIS Horizon EU project.
"},{"location":"dependency_licenses/","title":"Dependency licenses","text":"Name Version License protobuf 3.19.4 3-Clause BSD License tensorboard-plugin-wit 1.8.1 Apache 2.0 absl-py 1.2.0 Apache Software License flatbuffers 1.12 Apache Software License ghp-import 2.1.0 Apache Software License google-auth 2.11.0 Apache Software License google-auth-oauthlib 0.4.6 Apache Software License google-pasta 0.2.0 Apache Software License grpcio 1.48.1 Apache Software License importlib-metadata 4.12.0 Apache Software License keras 2.9.0 Apache Software License libclang 14.0.6 Apache Software License requests 2.28.1 Apache Software License rsa 4.9 Apache Software License tenacity 8.2.2 Apache Software License tensorboard 2.9.1 Apache Software License tensorboard-data-server 0.6.1 Apache Software License tensorflow 2.9.2 Apache Software License tensorflow-estimator 2.9.0 Apache Software License tensorflow-io-gcs-filesystem 0.26.0 Apache Software License watchdog 2.1.9 Apache Software License packaging 21.3 Apache Software License; BSD License python-dateutil 2.8.2 Apache Software License; BSD License affine 2.3.1 BSD cligj 0.7.2 BSD geopandas 0.11.1 BSD Fiona 1.8.21 BSD License Jinja2 3.1.2 BSD License Markdown 3.3.7 BSD License MarkupSafe 2.1.1 BSD License Pygments 2.13.0 BSD License Shapely 1.8.4 BSD License Werkzeug 2.2.2 BSD License astunparse 1.6.3 BSD License click 8.1.3 BSD License click-plugins 1.1.1 BSD License cycler 0.11.0 BSD License gast 0.4.0 BSD License h5py 3.7.0 BSD License idna 3.3 BSD License joblib 1.1.0 BSD License kiwisolver 1.4.4 BSD License mkdocs 1.3.1 BSD License numpy 1.23.2 BSD License oauthlib 3.2.0 BSD License pandas 1.4.4 BSD License patsy 0.5.2 BSD License pyasn1 0.4.8 BSD License pyasn1-modules 0.2.8 BSD License rasterio 1.3.2 BSD License requests-oauthlib 1.3.1 BSD License scikit-learn 1.1.2 BSD License scipy 1.9.1 BSD License statsmodels 0.13.2 BSD License threadpoolctl 3.1.0 BSD License wrapt 1.14.1 BSD License eis-toolkit 0.1.0 European Union Public Licence 1.2 (EUPL 1.2) Pillow 9.2.0 Historical Permission Notice and Disclaimer (HPND) opt-einsum 3.3.0 MIT snuggs 1.4.7 MIT GDAL 3.4.3 MIT License Keras-Preprocessing 1.1.2 MIT License PyYAML 6.0 MIT License attrs 22.1.0 MIT License cachetools 5.2.0 MIT License charset-normalizer 2.1.1 MIT License fonttools 4.37.1 MIT License mergedeep 1.3.4 MIT License mkdocs-material 8.4.2 MIT License mkdocs-material-extensions 1.0.3 MIT License munch 2.5.0 MIT License plotly 5.14.0 MIT License pymdown-extensions 9.5 MIT License pyparsing 3.0.9 MIT License pyproj 3.3.1 MIT License pytz 2022.2.1 MIT License pyyaml_env_tag 0.1 MIT License setuptools-scm 6.4.2 MIT License six 1.16.0 MIT License termcolor 1.1.0 MIT License tomli 2.0.1 MIT License urllib3 1.26.12 MIT License zipp 3.8.1 MIT License certifi 2022.6.15 Mozilla Public License 2.0 (MPL 2.0) matplotlib 3.5.3 Python Software Foundation License typing_extensions 4.3.0 Python Software Foundation License"},{"location":"conversions/csv_to_geodataframe/","title":"Convert csv to geodataframe","text":""},{"location":"conversions/csv_to_geodataframe/#eis_toolkit.conversions.csv_to_geodataframe.csv_to_geodataframe","title":"csv_to_geodataframe(csv, indexes, target_crs)
","text":"Read CSV file to a GeoDataFrame.
Usage of single index expects valid WKT geometry. Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.
Parameters:
Name Type Description Default csv
Path
Path to the .csv file to be read.
required indexes
Sequence[int]
Index(es) of the geometry column(s).
required target_crs
int
Target CRS as an EPSG code.
required Returns:
Type Description GeoDataFrame
CSV file read to a GeoDataFrame.
Source code in eis_toolkit/conversions/csv_to_geodataframe.py
@beartype\ndef csv_to_geodataframe(\n csv: Path,\n indexes: Sequence[int],\n target_crs: int,\n) -> geopandas.GeoDataFrame:\n \"\"\"\n Read CSV file to a GeoDataFrame.\n\n Usage of single index expects valid WKT geometry.\n Usage of two indexes expects POINT feature(s) X-coordinate as the first index and Y-coordinate as the second index.\n\n Args:\n csv: Path to the .csv file to be read.\n indexes: Index(es) of the geometry column(s).\n target_crs: Target CRS as an EPSG code.\n\n Returns:\n CSV file read to a GeoDataFrame.\n \"\"\"\n\n data_frame = _csv_to_geodataframe(\n csv=csv,\n indexes=indexes,\n target_crs=target_crs,\n )\n return data_frame\n
"},{"location":"conversions/raster_to_dataframe/","title":"Convert raster to dataframe","text":""},{"location":"conversions/raster_to_dataframe/#eis_toolkit.conversions.raster_to_dataframe.raster_to_dataframe","title":"raster_to_dataframe(raster, bands=None, add_coordinates=False)
","text":"Convert raster to Pandas DataFrame.
If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g., band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to dataframe by setting add_coordinates to True.
Parameters:
Name Type Description Default raster
DatasetReader
Raster to be converted.
required bands
Optional[Sequence[int]]
Selected bands from multiband raster. Indexing begins from one. Defaults to None.
None
add_coordinates
bool
Determines if pixel coordinates are written into dataframe. Defaults to False.
False
Returns:
Type Description DataFrame
Raster converted to a DataFrame.
Source code in eis_toolkit/conversions/raster_to_dataframe.py
@beartype\ndef raster_to_dataframe(\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n add_coordinates: bool = False,\n) -> pd.DataFrame:\n \"\"\"Convert raster to Pandas DataFrame.\n\n If bands are not given, all bands are used for conversion. Selected bands are named based on their index e.g.,\n band_1, band_2,...,band_n. If wanted, image coordinates (row, col) for each pixel can be written to\n dataframe by setting add_coordinates to True.\n\n Args:\n raster: Raster to be converted.\n bands: Selected bands from multiband raster. Indexing begins from one. Defaults to None.\n add_coordinates: Determines if pixel coordinates are written into dataframe. Defaults to False.\n\n Returns:\n Raster converted to a DataFrame.\n \"\"\"\n\n data_frame = _raster_to_dataframe(\n raster=raster,\n bands=bands,\n add_coordinates=add_coordinates,\n )\n return data_frame\n
"},{"location":"exploratory_analyses/dbscan/","title":"DBSCAN","text":""},{"location":"exploratory_analyses/dbscan/#eis_toolkit.exploratory_analyses.dbscan.dbscan","title":"dbscan(data, max_distance=0.5, min_samples=5)
","text":"Perform DBSCAN clustering on the input data.
Parameters:
Name Type Description Default data
GeoDataFrame
GeoDataFrame containing the input data.
required max_distance
float
The maximum distance between two samples for one to be considered as in the neighborhood of the other. Defaults to 0.5.
0.5
min_samples
int
The number of samples in a neighborhood for a point to be considered as a core point. Defaults to 5.
5
Returns:
Type Description GeoDataFrame
GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a point is a core point (1) or not (0).
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterException
The maximum distance between two samples in a neighborhood is not greater than zero or the number of samples in a neighborhood is not greater than one.
Source code in eis_toolkit/exploratory_analyses/dbscan.py
@beartype\ndef dbscan(data: gdp.GeoDataFrame, max_distance: float = 0.5, min_samples: int = 5) -> gdp.GeoDataFrame:\n \"\"\"\n Perform DBSCAN clustering on the input data.\n\n Args:\n data: GeoDataFrame containing the input data.\n max_distance: The maximum distance between two samples for one to be considered as in the neighborhood of\n the other. Defaults to 0.5.\n min_samples: The number of samples in a neighborhood for a point to be considered as a core point.\n Defaults to 5.\n\n Returns:\n GeoDataFrame containing two new columns: one with assigned cluster labels and one indicating whether a\n point is a core point (1) or not (0).\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterException: The maximum distance between two samples in a neighborhood is not greater\n than zero or the number of samples in a neighborhood is not greater than one.\n \"\"\"\n\n if data.empty:\n raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if max_distance <= 0:\n raise InvalidParameterValueException(\n \"The input value for the maximum distance between two samples in a neighborhood must be greater than zero.\"\n )\n\n if min_samples <= 1:\n raise InvalidParameterValueException(\n \"The input value for the minimum number of samples in a neighborhood must be greater than one.\"\n )\n\n dbscan_gdf = _dbscan(data, max_distance, min_samples)\n\n return dbscan_gdf\n
"},{"location":"exploratory_analyses/descriptive_statistics/","title":"Descriptive statistics","text":""},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_dataframe","title":"descriptive_statistics_dataframe(input_data, column)
","text":"Generate descriptive statistics from vector data.
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
Parameters:
Name Type Description Default input_data
Union[DataFrame, GeoDataFrame]
Data to generate descriptive statistics from.
required column
str
Specify the column to generate descriptive statistics from.
required Returns:
Type Description dict
The descriptive statistics in previously described order.
Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
@beartype\ndef descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str) -> dict:\n \"\"\"Generate descriptive statistics from vector data.\n\n Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n\n Args:\n input_data: Data to generate descriptive statistics from.\n column: Specify the column to generate descriptive statistics from.\n\n Returns:\n The descriptive statistics in previously described order.\n \"\"\"\n if column not in input_data.columns:\n raise InvalidColumnException\n data = input_data[column]\n statistics = _descriptive_statistics(data)\n return statistics\n
"},{"location":"exploratory_analyses/descriptive_statistics/#eis_toolkit.exploratory_analyses.descriptive_statistics.descriptive_statistics_raster","title":"descriptive_statistics_raster(input_data)
","text":"Generate descriptive statistics from raster data.
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness. Nodata values are removed from the data before the statistics are computed.
Parameters:
Name Type Description Default input_data
DatasetReader
Data to generate descriptive statistics from.
required Returns:
Type Description dict
The descriptive statistics in previously described order.
Source code in eis_toolkit/exploratory_analyses/descriptive_statistics.py
@beartype\ndef descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:\n \"\"\"Generate descriptive statistics from raster data.\n\n Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.\n Nodata values are removed from the data before the statistics are computed.\n\n Args:\n input_data: Data to generate descriptive statistics from.\n\n Returns:\n The descriptive statistics in previously described order.\n \"\"\"\n data = input_data.read().flatten()\n nodata_value = input_data.nodata\n data = data[data != nodata_value]\n statistics = _descriptive_statistics(data)\n return statistics\n
"},{"location":"exploratory_analyses/feature_importance/","title":"Feature importance","text":""},{"location":"exploratory_analyses/feature_importance/#eis_toolkit.exploratory_analyses.feature_importance.evaluate_feature_importance","title":"evaluate_feature_importance(classifier, x_test, y_test, feature_names, number_of_repetition=50, random_state=0)
","text":"Evaluate the feature importance of a sklearn classifier or linear model.
Parameters:
Name Type Description Default classifier
BaseEstimator
Trained classifier.
required x_test
ndarray
Testing feature data (X data need to be normalized / standardized).
required y_test
ndarray
Testing target data.
required feature_names
Sequence[str]
Names of the feature columns.
required number_of_repetition
int
Number of iteration used when calculate feature importance (default 50).
50
random_state
int
random state for repeatability of results (Default 0).
0
Return: A dataframe composed by features name and Importance value The resulted object with importance mean, importance std, and overall importance Raises: InvalidDatasetException: When the dataset is None.
Source code in eis_toolkit/exploratory_analyses/feature_importance.py
@beartype\ndef evaluate_feature_importance(\n classifier: sklearn.base.BaseEstimator,\n x_test: np.ndarray,\n y_test: np.ndarray,\n feature_names: Sequence[str],\n number_of_repetition: int = 50,\n random_state: int = 0,\n) -> tuple[pd.DataFrame, dict]:\n \"\"\"\n Evaluate the feature importance of a sklearn classifier or linear model.\n\n Parameters:\n classifier: Trained classifier.\n x_test: Testing feature data (X data need to be normalized / standardized).\n y_test: Testing target data.\n feature_names: Names of the feature columns.\n number_of_repetition: Number of iteration used when calculate feature importance (default 50).\n random_state: random state for repeatability of results (Default 0).\n Return:\n A dataframe composed by features name and Importance value\n The resulted object with importance mean, importance std, and overall importance\n Raises:\n InvalidDatasetException: When the dataset is None.\n \"\"\"\n\n if x_test is None or y_test is None:\n raise InvalidDatasetException\n\n result = permutation_importance(\n classifier, x_test, y_test.ravel(), n_repeats=number_of_repetition, random_state=random_state\n )\n\n feature_importance = pd.DataFrame({\"Feature\": feature_names, \"Importance\": result.importances_mean})\n\n feature_importance[\"Importance\"] = feature_importance[\"Importance\"] * 100\n feature_importance = feature_importance.sort_values(by=\"Importance\", ascending=False)\n\n return feature_importance, result\n
"},{"location":"exploratory_analyses/k_means_cluster/","title":"K-means clustering","text":""},{"location":"exploratory_analyses/k_means_cluster/#eis_toolkit.exploratory_analyses.k_means_cluster.k_means_clustering","title":"k_means_clustering(data, number_of_clusters=None, random_state=None)
","text":"Perform k-means clustering on the input data.
Parameters:
Name Type Description Default data
GeoDataFrame
A GeoDataFrame containing the input data.
required number_of_clusters
Optional[int]
The number of clusters (>= 1) to form. Optional parameter. If not provided, optimal number of clusters is computed using the elbow method.
None
random_state
Optional[int]
A random number generation for centroid initialization to make the randomness deterministic. Optional parameter.
None
Returns:
Type Description GeoDataFrame
GeoDataFrame containing assigned cluster labels.
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterException
The number of clusters is less than one.
Source code in eis_toolkit/exploratory_analyses/k_means_cluster.py
@beartype\ndef k_means_clustering(\n data: gdp.GeoDataFrame, number_of_clusters: Optional[int] = None, random_state: Optional[int] = None\n) -> gdp.GeoDataFrame:\n \"\"\"\n Perform k-means clustering on the input data.\n\n Args:\n data: A GeoDataFrame containing the input data.\n number_of_clusters: The number of clusters (>= 1) to form. Optional parameter. If not provided,\n optimal number of clusters is computed using the elbow method.\n random_state: A random number generation for centroid initialization to make\n the randomness deterministic. Optional parameter.\n\n Returns:\n GeoDataFrame containing assigned cluster labels.\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterException: The number of clusters is less than one.\n \"\"\"\n\n if data.empty:\n raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if number_of_clusters is not None and number_of_clusters < 1:\n raise InvalidParameterValueException(\"The input value for number of clusters must be at least one.\")\n\n k_means_gdf = _k_means_clustering(data, number_of_clusters, random_state)\n\n return k_means_gdf\n
"},{"location":"exploratory_analyses/parallel_coordinates/","title":"Plot parallel coordinates","text":""},{"location":"exploratory_analyses/parallel_coordinates/#eis_toolkit.exploratory_analyses.parallel_coordinates.plot_parallel_coordinates","title":"plot_parallel_coordinates(df, color_column_name, plot_title=None, palette_name=None, curved_lines=True)
","text":"Plot a parallel coordinates plot.
Automatically removes all rows containing null/nan values. Tries to convert columns to numeric to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only the first 8 to plot.
Parameters:
Name Type Description Default df
DataFrame
The DataFrame to plot.
required color_column_name
str
The name of the column in df to use for color encoding.
required plot_title
Optional[str]
The title for the plot. Default is None.
None
palette_name
Optional[str]
The name of the color palette to use. Default is None.
None
curved_lines
bool
If True, the plot will have curved instead of straight lines. Default is True.
True
Returns:
Type Description Figure
A matplotlib figure containing the parallel coordinates plot.
Raises:
Type Description EmptyDataFrameException
Raised when the DataFrame is empty.
InvalidColumnException
Raised when the color column is not found in the DataFrame.
InconsistentDataTypesException
Raised when the color column has multiple data types.
Source code in eis_toolkit/exploratory_analyses/parallel_coordinates.py
@beartype\ndef plot_parallel_coordinates(\n df: pd.DataFrame,\n color_column_name: str,\n plot_title: Optional[str] = None,\n palette_name: Optional[str] = None,\n curved_lines: bool = True,\n) -> matplotlib.figure.Figure:\n \"\"\"Plot a parallel coordinates plot.\n\n Automatically removes all rows containing null/nan values. Tries to convert columns to numeric\n to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only\n the first 8 to plot.\n\n Args:\n df: The DataFrame to plot.\n color_column_name: The name of the column in df to use for color encoding.\n plot_title: The title for the plot. Default is None.\n palette_name: The name of the color palette to use. Default is None.\n curved_lines: If True, the plot will have curved instead of straight lines. Default is True.\n\n Returns:\n A matplotlib figure containing the parallel coordinates plot.\n\n Raises:\n EmptyDataFrameException: Raised when the DataFrame is empty.\n InvalidColumnException: Raised when the color column is not found in the DataFrame.\n InconsistentDataTypesException: Raised when the color column has multiple data types.\n \"\"\"\n\n if df.empty:\n raise exceptions.EmptyDataFrameException(\"The input DataFrame is empty.\")\n\n if color_column_name not in df.columns:\n raise exceptions.InvalidColumnException(\n f\"The provided color column {color_column_name} is not found in the DataFrame.\"\n )\n\n df = df.convert_dtypes()\n df = df.apply(pd.to_numeric, errors=\"ignore\")\n\n color_data = df[color_column_name].to_numpy()\n if len(set([type(elem) for elem in color_data])) != 1:\n raise exceptions.InconsistentDataTypesException(\n \"The color column should have a consistent datatype. Multiple data types detected in the color column.\"\n )\n\n df = df.select_dtypes(include=np.number)\n\n # Drop non-numeric columns and the column used for coloring\n columns_to_drop = [color_column_name]\n for column in df.columns.values:\n if df[column].isnull().all():\n columns_to_drop.append(column)\n df = df.loc[:, ~df.columns.isin(columns_to_drop)]\n\n # Keep only first 8 columns if more are still present\n if len(df.columns.values) > 8:\n df = df.iloc[:, :8]\n\n data_labels = df.columns.values\n data = df.to_numpy()\n\n fig = _plot_parallel_coordinates(\n data=data,\n data_labels=data_labels,\n color_data=color_data,\n color_column_name=color_column_name,\n plot_title=plot_title,\n palette_name=palette_name,\n curved_lines=curved_lines,\n )\n return fig\n
"},{"location":"exploratory_analyses/pca/","title":"PCA","text":""},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.compute_pca","title":"compute_pca(data, number_of_components, scaler_type='standard', nodata=None, color_column_name=None)
","text":"Compute given number of principal components for numeric input data.
Various input data formats are accepted and the output format depends on the input format. If input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can be specified in this case.
Parameters:
Name Type Description Default data
Union[ndarray, DataFrame, GeoDataFrame, DatasetReader]
Input data for PCA.
required number_of_components
int
The number of principal components to compute Should be >= 1 and at most the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.
required scaler_type
Literal['standard', 'min_max', 'robust']
Transform data according to a specified Sklearn scaler. Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".
'standard'
nodata
Optional[Number]
Define nodata value to be masked out. Optional parameter. If None and input is raster, looks for nodata value from raster metadata. Defaults to None.
None
color_column_name
Optional[str]
If input data is a DataFrame or a GeoDataFrame, column name used for coloring data points in the produced pairplot can be defined. Defaults to None.
None
Returns:
Type Description Union[ndarray, Tuple[DataFrame, PairGrid], Tuple[GeoDataFrame, PairGrid], Tuple[ndarray, Profile]]
The computed principal components in corresponding format as the input data (for raster, output is
ndarray
Numpy array containing the data and raster profile) and the explained variance ratios for each component.
Raises:
Type Description EmptyDataException
The input is empty.
InvalidNumberOfPrincipalComponents
The number of principal components is less than 1 or more than number of columns if input was (Geo)DataFrame.
Source code in eis_toolkit/exploratory_analyses/pca.py
@beartype\ndef compute_pca(\n data: Union[np.ndarray, pd.DataFrame, gpd.GeoDataFrame, rasterio.io.DatasetReader],\n number_of_components: int,\n scaler_type: Literal[\"standard\", \"min_max\", \"robust\"] = \"standard\",\n nodata: Optional[Number] = None,\n color_column_name: Optional[str] = None,\n) -> Tuple[\n Union[\n np.ndarray,\n Tuple[pd.DataFrame, sns.PairGrid],\n Tuple[gpd.GeoDataFrame, sns.PairGrid],\n Tuple[np.ndarray, rasterio.profiles.Profile],\n ],\n np.ndarray,\n]:\n \"\"\"\n Compute given number of principal components for numeric input data.\n\n Various input data formats are accepted and the output format depends on the input format. If\n input is (Geo)DataFrame, a pairplot is produced additionally. A column name used for coloring can\n be specified in this case.\n\n Args:\n data: Input data for PCA.\n number_of_components: The number of principal components to compute Should be >= 1 and at most\n the number of numeric columns if input is (Geo)DataFrame or number of bands if input is raster.\n scaler_type: Transform data according to a specified Sklearn scaler.\n Options are \"standard\", \"min_max\" and \"robust\". Defaults to \"standard\".\n nodata: Define nodata value to be masked out. Optional parameter. If None and input is raster, looks\n for nodata value from raster metadata. Defaults to None.\n color_column_name: If input data is a DataFrame or a GeoDataFrame, column name used for\n coloring data points in the produced pairplot can be defined. Defaults to None.\n\n Returns:\n The computed principal components in corresponding format as the input data (for raster, output is\n Numpy array containing the data and raster profile) and the explained variance ratios for each component.\n\n Raises:\n EmptyDataException: The input is empty.\n InvalidNumberOfPrincipalComponents: The number of principal components is less than 1 or more than\n number of columns if input was (Geo)DataFrame.\n \"\"\"\n if scaler_type not in SCALERS:\n raise exceptions.InvalidParameterValueException(f\"Invalid scaler. Choose from: {list(SCALERS.keys())}\")\n\n if number_of_components < 1:\n raise exceptions.InvalidParameterValueException(\"The number of principal components should be >= 1.\")\n\n # Get feature matrix (Numpy array) from various input types\n if isinstance(data, np.ndarray):\n feature_matrix = data\n if feature_matrix.ndim == 2: # Table-like data (assumme it is a DataFrame transformed to Numpy array)\n feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=False)\n elif feature_matrix.ndim == 3: # Assume data represents multiband raster data\n rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n else:\n raise exceptions.InvalidParameterValueException(\n f\"Unsupported input data format. {feature_matrix.ndim} dimensions detected.\"\n )\n if feature_matrix.size == 0:\n raise exceptions.EmptyDataException(\"Input array is empty.\")\n\n elif isinstance(data, rasterio.io.DatasetReader):\n feature_matrix = data.read()\n if feature_matrix.ndim < 3:\n raise exceptions.InvalidParameterValueException(\"Input raster should have multiple bands.\")\n rows, cols = feature_matrix.shape[1], feature_matrix.shape[2]\n if nodata is None:\n nodata = data.nodata\n feature_matrix, nan_mask = _prepare_array_data(feature_matrix, nodata_value=nodata, reshape=True)\n\n elif isinstance(data, pd.DataFrame):\n df = data.copy()\n if df.empty:\n raise exceptions.EmptyDataException(\"Input DataFrame is empty.\")\n if number_of_components > len(df.columns):\n raise exceptions.InvalidParameterValueException(\n \"The number of principal should be at most the number of numeric columns in the input DataFrame.\"\n )\n if color_column_name is not None:\n color_column_data = df[color_column_name]\n\n if isinstance(data, gpd.GeoDataFrame):\n geometries = data.geometry\n crs = data.crs\n df = df.drop(columns=[\"geometry\"])\n\n df = df.convert_dtypes()\n df = df.apply(pd.to_numeric, errors=\"ignore\")\n df = df.select_dtypes(include=np.number)\n df = df.astype(dtype=np.number)\n feature_matrix = df.to_numpy()\n feature_matrix = feature_matrix.astype(float)\n feature_matrix, nan_mask = _handle_missing_values(feature_matrix, nodata)\n\n # Core PCA computation\n principal_components, explained_variances = _compute_pca(feature_matrix, number_of_components, scaler_type)\n\n # Put nodata back in and consider new dimension of data\n if nodata is not None:\n principal_components[nan_mask[:, number_of_components]] = nodata\n else:\n principal_components[nan_mask[:, :number_of_components]] = np.nan\n\n # Convert PCA output to proper format\n if isinstance(data, np.ndarray):\n if data.ndim == 3:\n result_data = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n else:\n result_data = principal_components\n\n elif isinstance(data, rasterio.io.DatasetReader):\n principal_components = principal_components.reshape(rows, cols, -1).transpose(2, 0, 1)\n out_profile = data.profile.copy()\n out_profile[\"count\"] = number_of_components\n out_profile[\"dtype\"] = \"float32\"\n result_data = (principal_components, out_profile)\n\n elif isinstance(data, pd.DataFrame):\n component_names = [f\"principal_component_{i+1}\" for i in range(number_of_components)]\n pca_df = pd.DataFrame(data=principal_components, columns=component_names)\n if color_column_name is not None:\n pca_df[color_column_name] = color_column_data\n sns_pair_grid = plot_pca(pca_df, explained_variances, color_column_name)\n if isinstance(data, gpd.GeoDataFrame):\n pca_df = gpd.GeoDataFrame(pca_df, geometry=geometries, crs=crs)\n result_data = (pca_df, sns_pair_grid)\n\n return result_data, explained_variances\n
"},{"location":"exploratory_analyses/pca/#eis_toolkit.exploratory_analyses.pca.plot_pca","title":"plot_pca(pca_df, explained_variances=None, color_column_name=None, save_path=None)
","text":"Plot a scatter matrix of different principal component combinations.
Parameters:
Name Type Description Default pca_df
DataFrame
A DataFrame containing computed principal components.
required explained_variances
Optional[ndarray]
The explained variance ratios for each principal component. Used for labeling axes in the plot. Optional parameter. Defaults to None.
None
color_column_name
Optional[str]
Name of the column that will be used for color-coding data points. Typically a categorical variable in the original data. Optional parameter, no colors if not provided. Defaults to None.
None
save_path
Optional[str]
The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.
None
Returns:
Type Description PairGrid
A Seaborn pairgrid containing the PCA scatter matrix.
Raises:
Type Description InvalidColumnException
DataFrame does not contain the given color column.
Source code in eis_toolkit/exploratory_analyses/pca.py
@beartype\ndef plot_pca(\n pca_df: pd.DataFrame,\n explained_variances: Optional[np.ndarray] = None,\n color_column_name: Optional[str] = None,\n save_path: Optional[str] = None,\n) -> sns.PairGrid:\n \"\"\"Plot a scatter matrix of different principal component combinations.\n\n Args:\n pca_df: A DataFrame containing computed principal components.\n explained_variances: The explained variance ratios for each principal component. Used for labeling\n axes in the plot. Optional parameter. Defaults to None.\n color_column_name: Name of the column that will be used for color-coding data points. Typically a\n categorical variable in the original data. Optional parameter, no colors if not provided.\n Defaults to None.\n save_path: The save path for the plot. Optional parameter, no saving if not provided. Defaults to None.\n\n Returns:\n A Seaborn pairgrid containing the PCA scatter matrix.\n\n Raises:\n InvalidColumnException: DataFrame does not contain the given color column.\n \"\"\"\n\n if color_column_name and color_column_name not in pca_df.columns:\n raise exceptions.InvalidColumnException(\"DataFrame does not contain the given color column.\")\n\n pair_grid = sns.pairplot(pca_df, hue=color_column_name)\n\n # Add explained variances to axis labels if provided\n if explained_variances is not None:\n labels = [f\"PC {i+1} ({var:.1f}%)\" for i, var in enumerate(explained_variances * 100)]\n else:\n labels = [f\"PC {i+1}\" for i in range(len(pair_grid.axes))]\n\n # Iterate over axes objects and set the labels\n for i, ax_row in enumerate(pair_grid.axes):\n for j, ax in enumerate(ax_row):\n if j == 0: # Only the first column\n ax.set_ylabel(labels[i], fontsize=\"large\")\n if i == len(ax_row) - 1: # Only the last row\n ax.set_xlabel(labels[j], fontsize=\"large\")\n\n if save_path is not None:\n plt.savefig(save_path)\n\n return pair_grid\n
"},{"location":"exploratory_analyses/statistical_testing/","title":"Statistical (hypothesis) testing","text":""},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.chi_square_test","title":"chi_square_test(data, target_column, columns=None)
","text":"Compute Chi-square test for independence on the input data.
It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings, booleans or integers, but not floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data
required target_column
str
Variable against which independence of other variables is tested.
required columns
Optional[Sequence[str]]
Variables that are tested against the variable in target_column. If None, every column is used.
None
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
InvalidParameterValueException
The target_column is not in input Dataframe or invalid column is provided.
Returns:
Type Description dict
Test statistics for each variable (except target_column).
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef chi_square_test(data: pd.DataFrame, target_column: str, columns: Optional[Sequence[str]] = None) -> dict:\n \"\"\"Compute Chi-square test for independence on the input data.\n\n It is assumed that the variables in the input data are independent and that they are categorical, i.e. strings,\n booleans or integers, but not floats.\n\n Args:\n data: Dataframe containing the input data\n target_column: Variable against which independence of other variables is tested.\n columns: Variables that are tested against the variable in target_column. If None, every column is used.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n InvalidParameterValueException: The target_column is not in input Dataframe or invalid column is provided.\n\n Returns:\n Test statistics for each variable (except target_column).\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n if not check_columns_valid(data, [target_column]):\n raise exceptions.InvalidParameterValueException(\"Target column not found in the Dataframe.\")\n\n if columns is not None:\n invalid_columns = [column for column in columns if column not in data.columns]\n if any(invalid_columns):\n raise exceptions.InvalidParameterValueException(\n f\"The following variables are not in the dataframe: {invalid_columns}\"\n )\n else:\n columns = data.columns\n\n statistics = {}\n for column in columns:\n if column != target_column:\n contingency_table = pd.crosstab(data[target_column], data[column])\n chi_square, p_value, degrees_of_freedom, _ = chi2_contingency(contingency_table)\n statistics[column] = (chi_square, p_value, degrees_of_freedom)\n\n return statistics\n
"},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.correlation_matrix","title":"correlation_matrix(data, correlation_method='pearson', min_periods=None)
","text":"Compute correlation matrix on the input data.
It is assumed that the data is numeric, i.e. integers or floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data.
required correlation_method
Literal[pearson, kendall, spearman]
'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.
'pearson'
min_periods
Optional[int]
Minimum number of observations required per pair of columns to have valid result. Optional.
None
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
InvalidParameterValueException
min_periods argument is used with method 'kendall'.
Returns:
Type Description DataFrame
Dataframe containing the correlation matrix
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef correlation_matrix(\n data: pd.DataFrame,\n correlation_method: Literal[\"pearson\", \"kendall\", \"spearman\"] = \"pearson\",\n min_periods: Optional[int] = None,\n) -> pd.DataFrame:\n \"\"\"Compute correlation matrix on the input data.\n\n It is assumed that the data is numeric, i.e. integers or floats.\n\n Args:\n data: Dataframe containing the input data.\n correlation_method: 'pearson', 'kendall', or 'spearman'. Defaults to 'pearson'.\n min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n InvalidParameterValueException: min_periods argument is used with method 'kendall'.\n\n Returns:\n Dataframe containing the correlation matrix\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n if correlation_method == \"kendall\" and min_periods is not None:\n raise exceptions.InvalidParameterValueException(\n \"The argument min_periods is available only with correlation methods 'pearson' and 'spearman'.\"\n )\n\n matrix = data.corr(method=correlation_method, min_periods=min_periods, numeric_only=True)\n\n return matrix\n
"},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.covariance_matrix","title":"covariance_matrix(data, min_periods=None, delta_degrees_of_freedom=1)
","text":"Compute covariance matrix on the input data.
It is assumed that the data is numeric, i.e. integers or floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data.
required min_periods
Optional[int]
Minimum number of observations required per pair of columns to have valid result. Optional.
None
delta_degrees_of_freedom
int
Delta degrees of freedom used for computing covariance matrix. Defaults to 1.
1
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
InvalidParameterValueException
Provided value for delta_degrees_of_freedom is negative.
Returns:
Type Description DataFrame
Dataframe containing the covariance matrix
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef covariance_matrix(\n data: pd.DataFrame, min_periods: Optional[int] = None, delta_degrees_of_freedom: int = 1\n) -> pd.DataFrame:\n \"\"\"Compute covariance matrix on the input data.\n\n It is assumed that the data is numeric, i.e. integers or floats.\n\n Args:\n data: Dataframe containing the input data.\n min_periods: Minimum number of observations required per pair of columns to have valid result. Optional.\n delta_degrees_of_freedom: Delta degrees of freedom used for computing covariance matrix. Defaults to 1.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n InvalidParameterValueException: Provided value for delta_degrees_of_freedom is negative.\n\n Returns:\n Dataframe containing the covariance matrix\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n if delta_degrees_of_freedom < 0:\n raise exceptions.InvalidParameterValueException(\"Delta degrees of freedom must be non-negative.\")\n\n matrix = data.cov(min_periods=min_periods, ddof=delta_degrees_of_freedom)\n\n return matrix\n
"},{"location":"exploratory_analyses/statistical_testing/#eis_toolkit.exploratory_analyses.statistical_tests.normality_test","title":"normality_test(data)
","text":"Compute Shapiro-Wilk test for normality on the input data.
It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.
Parameters:
Name Type Description Default data
DataFrame
Dataframe containing the input data.
required Returns:
Type Description dict
Test statistics for each variable.
Raises:
Type Description EmptyDataFrameException
The input Dataframe is empty.
Source code in eis_toolkit/exploratory_analyses/statistical_tests.py
@beartype\ndef normality_test(data: pd.DataFrame) -> dict:\n \"\"\"Compute Shapiro-Wilk test for normality on the input data.\n\n It is assumed that the input data is normally distributed and numeric, i.e. integers or floats.\n\n Args:\n data: Dataframe containing the input data.\n\n Returns:\n Test statistics for each variable.\n\n Raises:\n EmptyDataFrameException: The input Dataframe is empty.\n \"\"\"\n if check_empty_dataframe(data):\n raise exceptions.EmptyDataFrameException(\"The input Dataframe is empty.\")\n\n statistics = {}\n for column in data.columns:\n statistic, p_value = shapiro(data[column])\n statistics[column] = (statistic, p_value)\n\n return statistics\n
"},{"location":"prediction/fuzzy_overlay/","title":"Fuzzy overlay","text":""},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.and_overlay","title":"and_overlay(data)
","text":"Compute an 'and' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef and_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute an 'and' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'and' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return data.min(axis=0)\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.gamma_overlay","title":"gamma_overlay(data, gamma)
","text":"Compute a 'gamma' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required gamma
float
The gamma parameter. With gamma value 0, result will be same as 'product'overlay. When gamma is closer to 1, the weight of 'sum' overlay is increased. Value must be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values or gamma are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef gamma_overlay(data: np.ndarray, gamma: float) -> np.ndarray:\n \"\"\"Compute a 'gamma' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n gamma: The gamma parameter. With gamma value 0, result will be same as 'product'overlay.\n When gamma is closer to 1, the weight of 'sum' overlay is increased.\n Value must be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'gamma' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values or gamma are not in range [0, 1].\n \"\"\"\n if gamma < 0 or gamma > 1:\n raise exceptions.InvalidParameterValueException(\"The gamma parameter must be in range [0, 1]\")\n\n sum = sum_overlay(data=data)\n product = product_overlay(data=data)\n return product ** (1 - gamma) * sum**gamma\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.or_overlay","title":"or_overlay(data)
","text":"Compute an 'or' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef or_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute an 'or' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'or' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return data.max(axis=0)\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.product_overlay","title":"product_overlay(data)
","text":"Compute a 'product' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef product_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute a 'product' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'product' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return np.prod(data, axis=0)\n
"},{"location":"prediction/fuzzy_overlay/#eis_toolkit.prediction.fuzzy_overlay.sum_overlay","title":"sum_overlay(data)
","text":"Compute a 'sum' overlay operation with fuzzy logic.
Parameters:
Name Type Description Default data
ndarray
The input data as a 3D Numpy array. Each 2D array represents a raster band. Data points should be in the range [0, 1].
required Returns:
Type Description ndarray
2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].
Raises:
Type Description InvalidParameterValueException
If data values are not in range [0, 1].
Source code in eis_toolkit/prediction/fuzzy_overlay.py
@beartype\ndef sum_overlay(data: np.ndarray) -> np.ndarray:\n \"\"\"Compute a 'sum' overlay operation with fuzzy logic.\n\n Args:\n data: The input data as a 3D Numpy array. Each 2D array represents a raster band.\n Data points should be in the range [0, 1].\n\n Returns:\n 2D Numpy array with the result of the 'sum' overlay operation. Values are in range [0, 1].\n\n Raises:\n InvalidParameterValueException: If data values are not in range [0, 1].\n \"\"\"\n _check_input_data(data=data)\n\n return data.sum(axis=0) - np.prod(data, axis=0)\n
"},{"location":"prediction/weights_of_evidence/","title":"Weights of evidence","text":""},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_responses","title":"weights_of_evidence_calculate_responses(output_arrays, nr_of_deposits, nr_of_pixels)
","text":"Calculate the posterior probabilities for the given generalized weight arrays.
Parameters:
Name Type Description Default output_arrays
Sequence[Dict[str, ndarray]]
List of output array dictionaries returned by weights of evidence calculations. For each dictionary, generalized weight and generalized standard deviation arrays are used and summed together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found, the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).
required nr_of_deposits
int
Number of deposit pixels in the input data for weights of evidence calculations.
required nr_of_pixels
int
Number of evidence pixels in the input data for weights of evidence calculations.
required Returns:
Type Description ndarray
Array of posterior probabilites.
ndarray
Array of standard deviations in the posterior probability calculations.
ndarray
Array of confidence of the prospectivity values obtained in the posterior probability array.
Source code in eis_toolkit/prediction/weights_of_evidence.py
@beartype\ndef weights_of_evidence_calculate_responses(\n output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int\n) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:\n \"\"\"Calculate the posterior probabilities for the given generalized weight arrays.\n\n Args:\n output_arrays: List of output array dictionaries returned by weights of evidence calculations.\n For each dictionary, generalized weight and generalized standard deviation arrays are used and summed\n together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found,\n the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).\n nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.\n nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations.\n\n Returns:\n Array of posterior probabilites.\n Array of standard deviations in the posterior probability calculations.\n Array of confidence of the prospectivity values obtained in the posterior probability array.\n \"\"\"\n gen_weights_sum = sum(\n [\n item[GENERALIZED_WEIGHT_PLUS_COLUMN]\n if GENERALIZED_WEIGHT_PLUS_COLUMN in item.keys()\n else item[WEIGHT_PLUS_COLUMN]\n for item in output_arrays\n ]\n )\n gen_weights_variance_sum = sum(\n [\n np.square(item[GENERALIZED_S_WEIGHT_PLUS_COLUMN])\n if GENERALIZED_S_WEIGHT_PLUS_COLUMN in item.keys()\n else np.square(item[WEIGHT_S_PLUS_COLUMN])\n for item in output_arrays\n ]\n )\n\n prior_probabilities = nr_of_deposits / nr_of_pixels\n prior_odds = np.log(prior_probabilities / (1 - prior_probabilities))\n posterior_probabilities = np.exp(gen_weights_sum + prior_odds) / (1 + np.exp(gen_weights_sum + prior_odds))\n\n posterior_probabilities_squared = np.square(posterior_probabilities)\n posterior_probabilities_std = np.sqrt(\n (1 / nr_of_deposits + gen_weights_variance_sum) * posterior_probabilities_squared\n )\n\n confidence_array = posterior_probabilities / posterior_probabilities_std\n return posterior_probabilities, posterior_probabilities_std, confidence_array\n
"},{"location":"prediction/weights_of_evidence/#eis_toolkit.prediction.weights_of_evidence.weights_of_evidence_calculate_weights","title":"weights_of_evidence_calculate_weights(evidential_raster, deposits, raster_nodata=None, weights_type='unique', studentized_contrast_threshold=1, arrays_to_generate=None)
","text":"Calculate weights of spatial associations.
Parameters:
Name Type Description Default evidential_raster
DatasetReader
The evidential raster.
required deposits
GeoDataFrame
Vector data representing the mineral deposits or occurences point data.
required raster_nodata
Optional[Number]
If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None (nodata from raster metadata is used).
None
weights_type
Literal[unique, categorical, ascending, descending]
Accepted values are 'unique', 'categorical', 'ascending' and 'descending'. Unique weights does not create generalized classes and does not use a studentized contrast threshold value while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so that all classes with studentized contrast below the defined threshold are grouped into one generalized class. Cumulative ascending and descending weights find the class with max contrast and group classes above/below into generalized classes. Generalized weights are also calculated for generalized classes.
'unique'
studentized_contrast_threshold
Number
Studentized contrast threshold value used with 'categorical', 'ascending' and 'descending' weight types. Used either as reclassification threshold directly (categorical) or to check that class with max contrast has studentized contrast value at least the defined value (cumulative). Defaults to 1.
1
arrays_to_generate
Optional[Sequence[str]]
Arrays to generate from the computed weight metrics. All column names in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+] for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"] for the cumulative weight types.
None
Returns:
Type Description DataFrame
Dataframe with weights of spatial association between the input data.
dict
Dictionary of arrays for specified metrics.
dict
Raster metadata.
int
Number of deposit pixels.
int
Number of all evidence pixels.
Source code in eis_toolkit/prediction/weights_of_evidence.py
@beartype\ndef weights_of_evidence_calculate_weights(\n evidential_raster: rasterio.io.DatasetReader,\n deposits: gpd.GeoDataFrame,\n raster_nodata: Optional[Number] = None,\n weights_type: Literal[\"unique\", \"categorical\", \"ascending\", \"descending\"] = \"unique\",\n studentized_contrast_threshold: Number = 1,\n arrays_to_generate: Optional[Sequence[str]] = None,\n) -> Tuple[pd.DataFrame, dict, dict, int, int]:\n \"\"\"\n Calculate weights of spatial associations.\n\n Args:\n evidential_raster: The evidential raster.\n deposits: Vector data representing the mineral deposits or occurences point data.\n raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None\n (nodata from raster metadata is used).\n weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'.\n Unique weights does not create generalized classes and does not use a studentized contrast threshold value\n while categorical, cumulative ascending and cumulative descending do. Categorical weights are calculated so\n that all classes with studentized contrast below the defined threshold are grouped into one generalized\n class. Cumulative ascending and descending weights find the class with max contrast and group classes\n above/below into generalized classes. Generalized weights are also calculated for generalized classes.\n studentized_contrast_threshold: Studentized contrast threshold value used with 'categorical', 'ascending' and\n 'descending' weight types. Used either as reclassification threshold directly (categorical) or to check\n that class with max contrast has studentized contrast value at least the defined value (cumulative).\n Defaults to 1.\n arrays_to_generate: Arrays to generate from the computed weight metrics. All column names\n in the produced weights_df are valid choices. Defaults to [\"Class\", \"W+\", \"S_W+]\n for \"unique\" weights_type and [\"Class\", \"W+\", \"S_W+\", \"Generalized W+\", \"Generalized S_W+\"]\n for the cumulative weight types.\n\n Returns:\n Dataframe with weights of spatial association between the input data.\n Dictionary of arrays for specified metrics.\n Raster metadata.\n Number of deposit pixels.\n Number of all evidence pixels.\n \"\"\"\n\n if arrays_to_generate is None:\n if weights_type == \"unique\":\n metrics_to_arrays = DEFAULT_METRICS_UNIQUE\n else:\n metrics_to_arrays = DEFAULT_METRICS_CUMULATIVE\n else:\n for col_name in arrays_to_generate:\n if col_name not in VALID_DF_COLUMNS:\n raise exceptions.InvalidColumnException(\n f\"Arrays to generate contains invalid metric / column name: {col_name}.\"\n )\n metrics_to_arrays = arrays_to_generate.copy()\n\n # 1. Preprocess data\n evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata)\n raster_meta = evidential_raster.meta\n\n # Rasterize deposits\n deposit_array, _ = rasterize_vector(\n geodataframe=deposits, default_value=1.0, base_raster_profile=raster_meta, fill_value=0.0\n )\n\n # Mask NaN out of the array\n nodata_mask = np.isnan(evidence_array)\n masked_evidence_array = evidence_array[~nodata_mask]\n masked_deposit_array = deposit_array[~nodata_mask]\n\n # 2. WofE calculations\n if weights_type == \"unique\" or weights_type == \"categorical\":\n wofe_weights = _unique_weights(masked_deposit_array, masked_evidence_array)\n elif weights_type == \"ascending\":\n wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=True)\n elif weights_type == \"descending\":\n wofe_weights = _cumulative_weights(masked_deposit_array, masked_evidence_array, ascending=False)\n else:\n raise exceptions.InvalidParameterValueException(\n \"Expected weights_type to be one of unique, categorical, ascending or descending.\"\n )\n\n # 3. Create DataFrame based on calculated metrics\n df_entries = []\n for cls, metrics in wofe_weights.items():\n metrics = [round(metric, 4) if isinstance(metric, np.floating) else metric for metric in metrics]\n A, _, C, _, w_plus, s_w_plus, w_minus, s_w_minus, contrast, s_contrast, studentized_contrast = metrics\n df_entries.append(\n {\n CLASS_COLUMN: cls,\n PIXEL_COUNT_COLUMN: A + C,\n DEPOSIT_COUNT_COLUMN: A,\n WEIGHT_PLUS_COLUMN: w_plus,\n WEIGHT_S_PLUS_COLUMN: s_w_plus,\n WEIGHT_MINUS_COLUMN: w_minus,\n WEIGHT_S_MINUS_COLUMN: s_w_minus,\n CONTRAST_COLUMN: contrast,\n S_CONTRAST_COLUMN: s_contrast,\n STUDENTIZED_CONTRAST_COLUMN: studentized_contrast,\n }\n )\n weights_df = pd.DataFrame(df_entries)\n\n # 4. If we use cumulative weights type, calculate generalized classes and weights\n if weights_type == \"categorical\":\n weights_df = _generalized_classes_categorical(weights_df, studentized_contrast_threshold)\n weights_df = _generalized_weights_categorical(weights_df, masked_deposit_array)\n elif weights_type == \"ascending\" or weights_type == \"descending\":\n weights_df = _generalized_classes_cumulative(weights_df, studentized_contrast_threshold)\n weights_df = _generalized_weights_cumulative(weights_df, masked_deposit_array)\n\n # 5. Generate arrays for desired metrics\n arrays_dict = _generate_arrays_from_metrics(evidence_array, weights_df, metrics_to_arrays)\n\n # Return nr. of deposit pixels and nr. of all evidence pixels for to be used in calculate responses\n nr_of_deposits = int(np.sum(masked_deposit_array == 1))\n nr_of_pixels = int(np.size(masked_evidence_array))\n\n return weights_df, arrays_dict, raster_meta, nr_of_deposits, nr_of_pixels\n
"},{"location":"raster_processing/check_raster_grids/","title":"Check raster grids","text":""},{"location":"raster_processing/check_raster_grids/#eis_toolkit.raster_processing.check_raster_grids.check_raster_grids","title":"check_raster_grids(rasters, same_extent=False)
","text":"Check the set of input rasters for matching gridding and optionally matching bounds.
Parameters:
Name Type Description Default rasters
List[DatasetReader]
List of rasters to test for matching gridding.
required same_extent
bool
optional boolean argument that determines if rasters are tested for matching bounds. Default set to False.
False
Returns:
Type Description bool
True if gridding and optionally bounds matches, False if not.
Source code in eis_toolkit/raster_processing/check_raster_grids.py
def check_raster_grids( # type: ignore[no-any-unimported]\n rasters: List[rasterio.io.DatasetReader], same_extent: bool = False\n) -> bool:\n \"\"\"\n Check the set of input rasters for matching gridding and optionally matching bounds.\n\n Args:\n rasters: List of rasters to test for matching gridding.\n same_extent: optional boolean argument that determines if rasters are tested for matching bounds.\n Default set to False.\n\n Returns:\n True if gridding and optionally bounds matches, False if not.\n \"\"\"\n check = _check_raster_grids(rasters=rasters, same_extent=same_extent)\n return check\n
"},{"location":"raster_processing/clipping/","title":"Clipping","text":""},{"location":"raster_processing/clipping/#eis_toolkit.raster_processing.clipping.clip_raster","title":"clip_raster(raster, geodataframe)
","text":"Clips a raster with polygon geometries.
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be clipped.
required geodataframe
GeoDataFrame
A geodataframe containing the geometries to do the clipping with. Should contain only polygon features.
required Returns:
Type Description ndarray
The clipped raster data.
dict
The updated metadata.
Raises:
Type Description NonMatchingCrsException
The raster and geodataframe are not in the same CRS.
NotApplicableGeometryTypeException
The input geometries contain non-polygon features.
Source code in eis_toolkit/raster_processing/clipping.py
@beartype\ndef clip_raster(raster: rasterio.io.DatasetReader, geodataframe: geopandas.GeoDataFrame) -> Tuple[np.ndarray, dict]:\n \"\"\"Clips a raster with polygon geometries.\n\n Args:\n raster: The raster to be clipped.\n geodataframe: A geodataframe containing the geometries to do the clipping with.\n Should contain only polygon features.\n\n Returns:\n The clipped raster data.\n The updated metadata.\n\n Raises:\n NonMatchingCrsException: The raster and geodataframe are not in the same CRS.\n NotApplicableGeometryTypeException: The input geometries contain non-polygon features.\n \"\"\"\n geometries = geodataframe[\"geometry\"]\n\n if not check_matching_crs(\n objects=[raster, geometries],\n ):\n raise NonMatchingCrsException(\"The raster and geodataframe are not in the same CRS.\")\n\n if not check_geometry_types(\n geometries=geometries,\n allowed_types=[\"Polygon\", \"MultiPolygon\"],\n ):\n raise NotApplicableGeometryTypeException(\"The input geometries contain non-polygon features.\")\n\n out_image, out_meta = _clip_raster(\n raster=raster,\n geometries=geometries,\n )\n\n return out_image, out_meta\n
"},{"location":"raster_processing/create_constant_raster/","title":"Create constant raster","text":""},{"location":"raster_processing/create_constant_raster/#eis_toolkit.raster_processing.create_constant_raster.create_constant_raster","title":"create_constant_raster(constant_value, template_raster=None, coord_west=None, coord_north=None, coord_east=None, coord_south=None, target_epsg=None, target_pixel_size=None, raster_width=None, raster_height=None, nodata_value=None)
","text":"Create a constant raster based on a user-defined value.
Provide 3 methods for raster creation: 1. Set extent and coordinate system based on a template raster. 2. Set extent from origin, based on the western and northern coordinates and the pixel size. 3. Set extent from bounds, based on western, northern, eastern and southern points.
Always provide values for height and width for the last two options, which correspond to the desired number of pixels for rows and columns.
Parameters:
Name Type Description Default constant_value
Number
The constant value to use in the raster.
required template_raster
Optional[DatasetReader]
An optional raster to use as a template for the output.
None
coord_west
Optional[Number]
The western coordinate of the output raster in [m].
None
coord_east
Optional[Number]
The eastern coordinate of the output raster in [m].
None
coord_south
Optional[Number]
The southern coordinate of the output raster in [m].
None
coord_north
Optional[Number]
The northern coordinate of the output raster in [m].
None
target_epsg
Optional[int]
The EPSG code for the output raster.
None
target_pixel_size
Optional[int]
The pixel size of the output raster.
None
raster_width
Optional[int]
The width of the output raster.
None
raster_height
Optional[int]
The height of the output raster.
None
nodata_value
Optional[Number]
The nodata value of the output raster.
None
Returns:
Type Description Tuple[ndarray, dict]
A tuple containing the output raster as a NumPy array and updated metadata.
Raises:
Type Description InvalidParameterValueException
Provide invalid input parameter.
Source code in eis_toolkit/raster_processing/create_constant_raster.py
@beartype\ndef create_constant_raster( # type: ignore[no-any-unimported]\n constant_value: Number,\n template_raster: Optional[rasterio.io.DatasetReader] = None,\n coord_west: Optional[Number] = None,\n coord_north: Optional[Number] = None,\n coord_east: Optional[Number] = None,\n coord_south: Optional[Number] = None,\n target_epsg: Optional[int] = None,\n target_pixel_size: Optional[int] = None,\n raster_width: Optional[int] = None,\n raster_height: Optional[int] = None,\n nodata_value: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Create a constant raster based on a user-defined value.\n\n Provide 3 methods for raster creation:\n 1. Set extent and coordinate system based on a template raster.\n 2. Set extent from origin, based on the western and northern coordinates and the pixel size.\n 3. Set extent from bounds, based on western, northern, eastern and southern points.\n\n Always provide values for height and width for the last two options, which correspond to\n the desired number of pixels for rows and columns.\n\n Args:\n constant_value: The constant value to use in the raster.\n template_raster: An optional raster to use as a template for the output.\n coord_west: The western coordinate of the output raster in [m].\n coord_east: The eastern coordinate of the output raster in [m].\n coord_south: The southern coordinate of the output raster in [m].\n coord_north: The northern coordinate of the output raster in [m].\n target_epsg: The EPSG code for the output raster.\n target_pixel_size: The pixel size of the output raster.\n raster_width: The width of the output raster.\n raster_height: The height of the output raster.\n nodata_value: The nodata value of the output raster.\n\n Returns:\n A tuple containing the output raster as a NumPy array and updated metadata.\n\n Raises:\n InvalidParameterValueException: Provide invalid input parameter.\n \"\"\"\n\n if template_raster is not None:\n out_array, out_meta = _create_constant_raster_from_template(constant_value, template_raster, nodata_value)\n\n elif all(coords is not None for coords in [coord_west, coord_east, coord_south, coord_north]):\n if raster_height <= 0 or raster_width <= 0:\n raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n if not check_minmax_position((coord_west, coord_east) or not check_minmax_position((coord_south, coord_north))):\n raise InvalidParameterValueException(\"Invalid coordinate values provided.\")\n\n out_array, out_meta = _create_constant_raster_from_bounds(\n constant_value,\n coord_west,\n coord_north,\n coord_east,\n coord_south,\n target_epsg,\n raster_width,\n raster_height,\n nodata_value,\n )\n\n elif all(coords is not None for coords in [coord_west, coord_north]) and all(\n coords is None for coords in [coord_east, coord_south]\n ):\n if raster_height <= 0 or raster_width <= 0:\n raise InvalidParameterValueException(\"Invalid raster extent provided.\")\n if target_pixel_size <= 0:\n raise InvalidParameterValueException(\"Invalid pixel size.\")\n\n out_array, out_meta = _create_constant_raster_from_origin(\n constant_value,\n coord_west,\n coord_north,\n target_epsg,\n target_pixel_size,\n raster_width,\n raster_height,\n nodata_value,\n )\n\n else:\n raise InvalidParameterValueException(\"Suitable parameter values were not provided for any of the 3 methods.\")\n\n constant_value = cast_scalar_to_int(constant_value)\n nodata_value = cast_scalar_to_int(out_meta[\"nodata\"])\n\n if isinstance(constant_value, int) and isinstance(nodata_value, int):\n target_dtype = np.result_type(get_min_int_type(constant_value), get_min_int_type(nodata_value))\n out_array = out_array.astype(target_dtype)\n out_meta[\"dtype\"] = out_array.dtype\n elif isinstance(constant_value, int) and isinstance(nodata_value, float):\n out_array = out_array.astype(get_min_int_type(constant_value))\n out_meta[\"dtype\"] = np.float64.__name__\n elif isinstance(constant_value, float):\n out_array = out_array.astype(np.float64)\n out_meta[\"dtype\"] = out_array.dtype\n\n return out_array, out_meta\n
"},{"location":"raster_processing/extract_values_from_raster/","title":"Extract values from raster","text":""},{"location":"raster_processing/extract_values_from_raster/#eis_toolkit.raster_processing.extract_values_from_raster.extract_values_from_raster","title":"extract_values_from_raster(raster_list, geodataframe, raster_column_names=None)
","text":"Extract raster values using point data to a DataFrame.
If custom column names are not given, column names are file_name for singleband files and file_name_bandnumber for multiband files. If custom column names are given, there should be column names for each raster provided in the raster list.
Parameters:
Name Type Description Default raster_list
Sequence[DatasetReader]
List to extract values from.
required geodataframe
GeoDataFrame
Object to extract values with.
required raster_column_names
Optional[Sequence[str]]
List of optional column names for bands.
None
Returns:
Type Description DataFrame
Dataframe with x & y coordinates and the values from the raster file(s) as columns.
Raises:
Type Description NonMatchingParameterLengthsException
raster_list and raster_columns_names have different lengths.
Source code in eis_toolkit/raster_processing/extract_values_from_raster.py
@beartype\ndef extract_values_from_raster(\n raster_list: Sequence[rasterio.io.DatasetReader],\n geodataframe: gpd.GeoDataFrame,\n raster_column_names: Optional[Sequence[str]] = None,\n) -> pd.DataFrame:\n \"\"\"Extract raster values using point data to a DataFrame.\n\n If custom column names are not given, column names are file_name for singleband files\n and file_name_bandnumber for multiband files. If custom column names are given, there\n should be column names for each raster provided in the raster list.\n\n Args:\n raster_list: List to extract values from.\n geodataframe: Object to extract values with.\n raster_column_names: List of optional column names for bands.\n\n Returns:\n Dataframe with x & y coordinates and the values from the raster file(s) as columns.\n\n Raises:\n NonMatchingParameterLengthsException: raster_list and raster_columns_names have different lengths.\n \"\"\"\n if raster_column_names == []:\n raster_column_names = None\n\n if raster_column_names is not None and len(raster_list) != len(raster_column_names):\n raise NonMatchingParameterLengthsException(\"Raster list and raster columns names have different lengths.\")\n\n data_frame = _extract_values_from_raster(\n raster_list=raster_list, geodataframe=geodataframe, raster_column_names=raster_column_names\n )\n\n return data_frame\n
"},{"location":"raster_processing/reprojecting/","title":"Reprojecting","text":""},{"location":"raster_processing/reprojecting/#eis_toolkit.raster_processing.reprojecting.reproject_raster","title":"reproject_raster(raster, target_crs, resampling_method=warp.Resampling.nearest)
","text":"Reprojects raster to match given coordinate reference system (EPSG).
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be reprojected.
required target_crs
int
Target CRS as EPSG code.
required resampling_method
Resampling
Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.
nearest
Returns:
Type Description ndarray
The reprojected raster data.
dict
The updated metadata.
Raises:
Type Description NonMatchinCrsException
Raster is already in the target CRS.
Source code in eis_toolkit/raster_processing/reprojecting.py
@beartype\ndef reproject_raster(\n raster: rasterio.io.DatasetReader, target_crs: int, resampling_method: warp.Resampling = warp.Resampling.nearest\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Reprojects raster to match given coordinate reference system (EPSG).\n\n Args:\n raster: The raster to be reprojected.\n target_crs: Target CRS as EPSG code.\n resampling_method: Resampling method. Most suitable method depends on the dataset and context.\n Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.\n\n Returns:\n The reprojected raster data.\n The updated metadata.\n\n Raises:\n NonMatchinCrsException: Raster is already in the target CRS.\n \"\"\"\n if target_crs == int(raster.crs.to_string()[5:]):\n raise MatchingCrsException(\"Raster is already in the target CRS.\")\n\n out_image, out_meta = _reproject_raster(raster, target_crs, resampling_method)\n\n return out_image, out_meta\n
"},{"location":"raster_processing/resampling/","title":"Resampling","text":""},{"location":"raster_processing/resampling/#eis_toolkit.raster_processing.resampling.resample","title":"resample(raster, resolution, resampling_method=Resampling.bilinear)
","text":"Resamples raster according to given resolution.
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be resampled.
required resolution
Number
Target resolution i.e. cell size of the output raster.
required resampling_method
Resampling
Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to bilinear.
bilinear
Returns:
Type Description ndarray
The resampled raster data.
dict
The updated metadata.
Raises:
Type Description NumericValueSignException
Resolution is not a positive value.
Source code in eis_toolkit/raster_processing/resampling.py
@beartype\ndef resample(\n raster: rasterio.io.DatasetReader,\n resolution: Number,\n resampling_method: Resampling = Resampling.bilinear,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Resamples raster according to given resolution.\n\n Args:\n raster: The raster to be resampled.\n resolution: Target resolution i.e. cell size of the output raster.\n resampling_method: Resampling method. Most suitable\n method depends on the dataset and context. Nearest, bilinear and cubic are some\n common choices. This parameter defaults to bilinear.\n\n Returns:\n The resampled raster data.\n The updated metadata.\n\n Raises:\n NumericValueSignException: Resolution is not a positive value.\n \"\"\"\n if resolution <= 0:\n raise exceptions.NumericValueSignException(f\"Expected a positive value for resolution: {resolution})\")\n\n out_image, out_meta = _resample(raster, resolution, resampling_method)\n return out_image, out_meta\n
"},{"location":"raster_processing/snapping/","title":"Snapping","text":""},{"location":"raster_processing/snapping/#eis_toolkit.raster_processing.snapping.snap_with_raster","title":"snap_with_raster(raster, snap_raster)
","text":"Snaps/aligns raster to given snap raster.
Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction. If rasters are aligned, simply returns input raster data and metadata.
Parameters:
Name Type Description Default raster
DatasetReader
The raster to be clipped.
required snap_raster
DatasetReader
The snap raster i.e. reference grid raster.
required Returns:
Type Description ndarray
The snapped raster data.
dict
The updated metadata.
Raises:
Type Description NonMatchingCrsException
Raster and and snap raster are not in the same CRS.
MatchingRasterGridException
Raster grids are already aligned.
Source code in eis_toolkit/raster_processing/snapping.py
@beartype\ndef snap_with_raster(raster: rasterio.DatasetReader, snap_raster: rasterio.DatasetReader) -> Tuple[np.ndarray, dict]:\n \"\"\"Snaps/aligns raster to given snap raster.\n\n Raster is snapped from its left-bottom corner to nearest snap raster grid corner in left-bottom direction.\n If rasters are aligned, simply returns input raster data and metadata.\n\n Args:\n raster: The raster to be clipped.\n snap_raster: The snap raster i.e. reference grid raster.\n\n Returns:\n The snapped raster data.\n The updated metadata.\n\n Raises:\n NonMatchingCrsException: Raster and and snap raster are not in the same CRS.\n MatchingRasterGridException: Raster grids are already aligned.\n \"\"\"\n\n if not check_matching_crs(\n objects=[raster, snap_raster],\n ):\n raise NonMatchingCrsException(\"Raster and and snap raster have different CRS.\")\n\n if snap_raster.bounds.bottom == raster.bounds.bottom and snap_raster.bounds.left == raster.bounds.left:\n raise MatchingRasterGridException(\"Raster grids are already aligned.\")\n\n out_image, out_meta = _snap(raster, snap_raster)\n return out_image, out_meta\n
"},{"location":"raster_processing/unifying/","title":"Unifying","text":""},{"location":"raster_processing/unifying/#eis_toolkit.raster_processing.unifying.unify_raster_grids","title":"unify_raster_grids(base_raster, rasters_to_unify, resampling_method=Resampling.nearest, same_extent=False)
","text":"Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.
Parameters:
Name Type Description Default base_raster
DatasetReader
The base raster to determine target raster grid properties.
required rasters_to_unify
Sequence[DatasetReader]
Rasters to be unified with the base raster.
required resampling_method
Resampling
Resampling method. Most suitable method depends on the dataset and context. Nearest, bilinear and cubic are some common choices. This parameter defaults to nearest.
nearest
same_extent
bool
If the unified rasters will be forced to have the same extent/bounds as the base raster. Expands smaller rasters with nodata cells. Defaults to False.
False
Returns:
Type Description List[Tuple[ndarray, dict]]
List of unified rasters' data and metadata. First element is the base raster.
Raises:
Type Description InvalidParameterValueException
Rasters to unify is empty.
Source code in eis_toolkit/raster_processing/unifying.py
@beartype\ndef unify_raster_grids(\n base_raster: rasterio.io.DatasetReader,\n rasters_to_unify: Sequence[rasterio.io.DatasetReader],\n resampling_method: Resampling = Resampling.nearest,\n same_extent: bool = False,\n) -> List[Tuple[np.ndarray, dict]]:\n \"\"\"Unifies (reprojects, resamples, aligns and optionally clips) given rasters relative to base raster.\n\n Args:\n base_raster: The base raster to determine target raster grid properties.\n rasters_to_unify: Rasters to be unified with the base raster.\n resampling_method: Resampling method. Most suitable\n method depends on the dataset and context. Nearest, bilinear and cubic are some\n common choices. This parameter defaults to nearest.\n same_extent: If the unified rasters will be forced to have the same extent/bounds\n as the base raster. Expands smaller rasters with nodata cells. Defaults to False.\n\n Returns:\n List of unified rasters' data and metadata. First element is the base raster.\n\n Raises:\n InvalidParameterValueException: Rasters to unify is empty.\n \"\"\"\n if len(rasters_to_unify) == 0:\n raise InvalidParameterValueException(\"Rasters to unify is empty.\")\n\n out_rasters = _unify_raster_grids(base_raster, rasters_to_unify, resampling_method, same_extent)\n return out_rasters\n
"},{"location":"raster_processing/unique_combinations/","title":"Unique combinations in rasters","text":""},{"location":"raster_processing/unique_combinations/#eis_toolkit.raster_processing.unique_combinations.unique_combinations","title":"unique_combinations(raster_list)
","text":"Get combinations of raster values between rasters.
All bands in all rasters are used for analysis. The first band of the first raster is used for reference when making the output.
Parameters:
Name Type Description Default raster_list
Sequence[DatasetReader]
Rasters to be used for finding combinations.
required Returns:
Name Type Description out_image
ndarray
Combinations of rasters.
out_meta
dict
The metadata of the first raster in raster_list.
Source code in eis_toolkit/raster_processing/unique_combinations.py
@beartype\ndef unique_combinations( # type: ignore[no-any-unimported]\n raster_list: Sequence[rasterio.io.DatasetReader],\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Get combinations of raster values between rasters.\n\n All bands in all rasters are used for analysis.\n The first band of the first raster is used for reference when making the output.\n\n Args:\n raster_list: Rasters to be used for finding combinations.\n\n Returns:\n out_image: Combinations of rasters.\n out_meta: The metadata of the first raster in raster_list.\n \"\"\"\n bands = []\n out_meta = raster_list[0].meta\n out_meta[\"count\"] = 1\n\n for raster in raster_list:\n for band in range(1, raster.count + 1):\n bands.append(raster.read(band))\n\n if len(bands) == 1:\n raise InvalidParameterValueException(\"Expected to have more bands than 1\")\n\n if check_raster_grids(raster_list) is not True:\n raise InvalidParameterValueException(\"Expected raster grids to be of same shape\")\n\n out_image = _unique_combinations(bands)\n return out_image, out_meta\n
"},{"location":"raster_processing/windowing/","title":"Windowing","text":""},{"location":"raster_processing/windowing/#eis_toolkit.raster_processing.windowing.extract_window","title":"extract_window(raster, center_coords, height, width)
","text":"Extract window from raster.
Center coordinate must be inside the raster but window can extent outside the raster in which case padding with raster nodata value is used. Args: raster: Source raster. center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS. height: Window height in pixels. width: Window width in pixels.
Returns:
Type Description ndarray
The extracted raster window.
dict
The updated metadata.
Raises:
Type Description InvalidParameterValueException
Window size is too small.
CoordinatesOutOfBoundException
Window center coordinates are out of raster bounds.
Source code in eis_toolkit/raster_processing/windowing.py
@beartype\ndef extract_window(\n raster: rasterio.io.DatasetReader,\n center_coords: Tuple[Number, Number],\n height: int,\n width: int,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Extract window from raster.\n\n Center coordinate must be inside the raster but window can extent outside the raster in which case padding with\n raster nodata value is used.\n Args:\n raster: Source raster.\n center_coords: Center coordinates for window in form (x, y). The coordinates should be in the raster's CRS.\n height: Window height in pixels.\n width: Window width in pixels.\n\n Returns:\n The extracted raster window.\n The updated metadata.\n\n Raises:\n InvalidParameterValueException: Window size is too small.\n CoordinatesOutOfBoundException: Window center coordinates are out of raster bounds.\n \"\"\"\n\n if height < 1 or width < 1:\n raise InvalidParameterValueException(f\"Window size is too small: {height}, {width}.\")\n\n center_x = center_coords[0]\n center_y = center_coords[1]\n\n if (\n center_x < raster.bounds.left\n or center_x > raster.bounds.right\n or center_y < raster.bounds.bottom\n or center_y > raster.bounds.top\n ):\n raise CoordinatesOutOfBoundsException(\"Window center coordinates are out of raster bounds.\")\n\n out_image, out_meta = _extract_window(raster, center_coords, height, width)\n\n return out_image, out_meta\n
"},{"location":"training_data_tools/class_balancing/","title":"Class balancing","text":""},{"location":"training_data_tools/class_balancing/#eis_toolkit.training_data_tools.class_balancing.balance_SMOTETomek","title":"balance_SMOTETomek(X, y, sampling_strategy='auto', random_state=None)
","text":"Balances the classes of input dataset using SMOTETomek resampling method.
Parameters:
Name Type Description Default X
Union[DataFrame, ndarray]
The feature matrix (input data as a DataFrame).
required y
Union[Series, ndarray]
The target labels corresponding to the feature matrix.
required sampling_strategy
Union[float, str, dict]
Parameter controlling how to perform the resampling. If float, specifies the ratio of samples in minority class to samples of majority class, if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"), if dict, the keys should be targeted classes and values the desired number of samples for the class. Defaults to \"auto\", which will resample all classes except the majority class.
'auto'
random_state
Optional[int]
Parameter controlling randomization of the algorithm. Can be given a seed (number). Defaults to None, which randomizes the seed.
None
Returns:
Type Description tuple[Union[DataFrame, ndarray], Union[Series, ndarray]]
Resampled feature matrix and target labels.
Raises:
Type Description NonMatchingParameterLengthsException
If X and y have different length.
Source code in eis_toolkit/training_data_tools/class_balancing.py
@beartype\ndef balance_SMOTETomek(\n X: Union[pd.DataFrame, np.ndarray],\n y: Union[pd.Series, np.ndarray],\n sampling_strategy: Union[float, str, dict] = \"auto\",\n random_state: Optional[int] = None,\n) -> tuple[Union[pd.DataFrame, np.ndarray], Union[pd.Series, np.ndarray]]:\n \"\"\"Balances the classes of input dataset using SMOTETomek resampling method.\n\n Args:\n X: The feature matrix (input data as a DataFrame).\n y: The target labels corresponding to the feature matrix.\n sampling_strategy: Parameter controlling how to perform the resampling.\n If float, specifies the ratio of samples in minority class to samples of majority class,\n if str, specifies classes to be resampled (\"minority\", \"not minority\", \"not majority\", \"all\", \"auto\"),\n if dict, the keys should be targeted classes and values the desired number of samples for the class.\n Defaults to \"auto\", which will resample all classes except the majority class.\n random_state: Parameter controlling randomization of the algorithm. Can be given a seed (number).\n Defaults to None, which randomizes the seed.\n\n Returns:\n Resampled feature matrix and target labels.\n\n Raises:\n NonMatchingParameterLengthsException: If X and y have different length.\n \"\"\"\n\n if len(X) != len(y):\n raise exceptions.NonMatchingParameterLengthsException(\n \"Feature matrix X and target labels y must have the same length.\"\n )\n\n X_res, y_res = SMOTETomek(sampling_strategy=sampling_strategy, random_state=random_state).fit_resample(X, y)\n return X_res, y_res\n
"},{"location":"transformations/binarize/","title":"Binarize","text":""},{"location":"transformations/binarize/#eis_toolkit.transformations.binarize.binarize","title":"binarize(raster, bands=None, thresholds=[Number], nodata=None)
","text":"Binarize data based on a given threshold.
Replaces values less or equal threshold with 0. Replaces values greater than the threshold with 1.
Takes one nodata value which will be re-written after transformation.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The threshold can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
thresholds
Sequence[Number]
Threshold values for transformation.
[Number]
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
Source code in eis_toolkit/transformations/binarize.py
@beartype\ndef binarize( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n thresholds: Sequence[Number] = [Number],\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Binarize data based on a given threshold.\n\n Replaces values less or equal threshold with 0.\n Replaces values greater than the threshold with 1.\n\n Takes one nodata value which will be re-written after transformation.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The threshold can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n thresholds: Threshold values for transformation.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = cast_scalar_to_int(raster.nodata if nodata is None else nodata)\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection.\")\n\n if check_parameter_length(bands, thresholds) is False:\n raise NonMatchingParameterLengthsException(\"Invalid threshold length.\")\n\n expanded_args = expand_and_zip(bands, thresholds)\n thresholds = [element[1] for element in expanded_args]\n\n out_settings = {}\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n inital_dtype = band_array.dtype\n\n band_mask = np.isin(band_array, nodata)\n band_array = _binarize(band_array, threshold=thresholds[i])\n band_array = np.where(band_mask, nodata, band_array)\n\n if not check_dtype_for_int(nodata):\n band_array = band_array.astype(inital_dtype)\n else:\n band_array = band_array.astype(np.min_scalar_type(nodata))\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"threshold\": thresholds[i],\n \"nodata\": nodata,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/clip/","title":"Clip","text":""},{"location":"transformations/clip/#eis_toolkit.transformations.clip.clip_transform","title":"clip_transform(raster, limits, bands=None, nodata=None)
","text":"Clips data based on specified upper and lower limits.
Takes one nodata value that will be ignored in calculations. Replaces values below the lower limit and above the upper limit with provided values, respecively. Works both one-sided and two-sided but raises error if no limits provided.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The limits can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
limits
Sequence[Tuple[Optional[Number], Optional[Number]]]
Lower and upper limits (lower, upper) as real values.
required nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values).
Source code in eis_toolkit/transformations/clip.py
@beartype\ndef clip_transform( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n limits: Sequence[Tuple[Optional[Number], Optional[Number]]],\n bands: Optional[Sequence[int]] = None,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Clips data based on specified upper and lower limits.\n\n Takes one nodata value that will be ignored in calculations.\n Replaces values below the lower limit and above the upper limit with provided values, respecively.\n Works both one-sided and two-sided but raises error if no limits provided.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The limits can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n limits: Lower and upper limits (lower, upper) as real values.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values).\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, limits) is False:\n raise NonMatchingParameterLengthsException(\"Invalid limit length.\")\n\n for item in limits:\n if item.count(None) == len(item):\n raise InvalidParameterValueException(f\"Limit values all None: {item}.\")\n\n if not check_minmax_position(item):\n raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n expanded_args = expand_and_zip(bands, limits)\n limits = [element[1] for element in expanded_args]\n\n out_settings = {}\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n inital_dtype = band_array.dtype\n\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n band_array = _clip_transform(band_array, limits=limits[i])\n\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"limit_lower\": cast_scalar_to_int(limits[i][0]),\n \"limit_upper\": cast_scalar_to_int(limits[i][1]),\n \"nodata\": cast_scalar_to_int(nodata),\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/linear/","title":"Linear","text":""},{"location":"transformations/linear/#eis_toolkit.transformations.linear.min_max_scaling","title":"min_max_scaling(raster, bands=None, new_range=[(0, 1)], nodata=None)
","text":"Normalize data based on a specified new range.
Uses the provided new minimum and maximum to transform data into the new interval. Takes one nodata value that will be ignored in calculations.
If no band/column selection specified, all bands/columns will be used. The new_range can be set for each band individually. If a parameter contains only 1 entry, it will be applied for all bands.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
new_range
Sequence[Tuple[Number, Number]]
The new interval data will be transformed into. First value corresponds to min, second to max.
[(0, 1)]
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values).
Source code in eis_toolkit/transformations/linear.py
@beartype\ndef min_max_scaling( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n new_range: Sequence[Tuple[Number, Number]] = [(0, 1)],\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Normalize data based on a specified new range.\n\n Uses the provided new minimum and maximum to transform data into the new interval.\n Takes one nodata value that will be ignored in calculations.\n\n If no band/column selection specified, all bands/columns will be used.\n The new_range can be set for each band individually.\n If a parameter contains only 1 entry, it will be applied for all bands.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n new_range: The new interval data will be transformed into. First value corresponds to min, second to max.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values).\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, new_range) is False:\n raise NonMatchingParameterLengthsException(\"Invalid new_range length\")\n\n for item in new_range:\n if not check_minmax_position(item):\n raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}\")\n\n expanded_args = expand_and_zip(bands, new_range)\n new_range = [element[1] for element in expanded_args]\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n band_array = _min_max_scaling(band_array.astype(np.float64), new_range=new_range[i])\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"scaled_min\": new_range[i][0],\n \"scaled_max\": new_range[i][1],\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/linear/#eis_toolkit.transformations.linear.z_score_normalization","title":"z_score_normalization(raster, bands=None, nodata=None)
","text":"Normalize data based on mean and standard deviation.
Results will have a mean = 0 and standard deviation = 1. Takes one nodata value that will be ignored in calculations.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
Source code in eis_toolkit/transformations/linear.py
@beartype\ndef z_score_normalization( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Normalize data based on mean and standard deviation.\n\n Results will have a mean = 0 and standard deviation = 1.\n Takes one nodata value that will be ignored in calculations.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection.\")\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n band_array, mean_array, sd_array = _z_score_normalization(band_array.astype(np.float64))\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"original_mean\": truncate_decimal_places(mean_array, decimal_places=out_decimals),\n \"original_sd\": truncate_decimal_places(sd_array, decimal_places=out_decimals),\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/logarithmic/","title":"Logarithmic","text":""},{"location":"transformations/logarithmic/#eis_toolkit.transformations.logarithmic.log_transform","title":"log_transform(raster, bands=None, log_transform=['log2'], nodata=None)
","text":"Perform a logarithmic transformation on the provided data.
Takes one nodata value that will be ignored in calculations. Negative values will not be considered for transformation and replaced by the specific nodata value.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The log_transform can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
log_transform
Sequence[str]
The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.
['log2']
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands
InvalidParameterValueException
The input does not match the requirements (values, order of values)
Source code in eis_toolkit/transformations/logarithmic.py
@beartype\ndef log_transform( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n log_transform: Sequence[str] = [\"log2\"],\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Perform a logarithmic transformation on the provided data.\n\n Takes one nodata value that will be ignored in calculations.\n Negative values will not be considered for transformation and replaced by the specific nodata value.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The log_transform can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n log_transform: The base for logarithmic transformation. Valid values 'ln', 'log2' and 'log10'.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands\n InvalidParameterValueException: The input does not match the requirements (values, order of values)\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, log_transform) is False:\n raise NonMatchingParameterLengthsException(\"Invalid length for log-base values.\")\n\n for item in log_transform:\n if not (item == \"ln\" or item == \"log2\" or item == \"log10\"):\n raise InvalidParameterValueException(f\"Invalid method: {item}.\")\n\n expanded_args = expand_and_zip(bands, log_transform)\n log_transform = [element[1] for element in expanded_args]\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n band_array[band_array <= 0] = np.nan\n\n if log_transform[i] == \"ln\":\n band_array = _log_transform_ln(band_array.astype(np.float64))\n elif log_transform[i] == \"log2\":\n band_array = _log_transform_log2(band_array.astype(np.float64))\n elif log_transform[i] == \"log10\":\n band_array = _log_transform_log10(band_array.astype(np.float64))\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"log_transform\": log_transform[i],\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/one_hot_encoding/","title":"One-hot encoding","text":""},{"location":"transformations/one_hot_encoding/#eis_toolkit.transformations.one_hot_encoding.one_hot_encode","title":"one_hot_encode(data, columns=None, drop_original_columns=True, drop_category=None, sparse_output=True, out_dtype=int, handle_unknown='infrequent_if_exist', min_frequency=None, max_categories=None)
","text":"Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.
This function converts categorical variables into a form that could be provided to machine learning algorithms for better prediction. For each unique category in the feature, a new binary column is created.
Continuous data should not be given to this function to avoid excessive amounts of binary features. If input is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.
The function allows control over aspects like handling unknown categories, controlling sparsity of the output, and setting data type of the encoded columns.
Parameters:
Name Type Description Default data
Union[DataFrame, ndarray]
Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be restricted to specified columns.
required columns
Optional[Sequence[str]]
Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.
None
drop_original_columns
bool
If True and 'data' is a DataFrame, the original columns being encoded will be dropped from the output. Defaults to True.
True
drop_category
Optional[Literal[first, if_binary]]
Specifies a method to drop one of the categories to avoid multicollinearity. 'first' drops the first category, 'if_binary' drops one category only if the feature is binary. If None, no category is dropped. Defaults to None.
None
sparse_output
bool
Determines whether the output matrix is sparse or dense. Defaults to True (sparse).
True
out_dtype
Union[type, dtype]
Numeric data type of the output. Defaults to int.
int
handle_unknown
Literal[error, ignore, infrequent_if_exist]
Specifies how to handle unknown categories encountered during transform. 'error' raises an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent. Defaults to 'infrequent_if_exist'.
'infrequent_if_exist'
min_frequency
Optional[Number]
The minimum frequency (as a float or an int) needed to include a category in encoding. Optional parameter. Defaults to None.
None
max_categories
Optional[int]
The maximum number of categories to include in encoding. Optional parameter. Defaults to None.
None
Returns:
Type Description Union[DataFrame, ndarray, csr_matrix]
Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse) if input was a Numpy array.
Raises:
Type Description EmptyDataFrameException
If the input DataFrame is empty.
InvalidDatasetException
If the input Numpy array is empty.
InvalidColumnException
If any specified column to encode does not exist in the input DataFrame.
Source code in eis_toolkit/transformations/one_hot_encoding.py
@beartype\ndef one_hot_encode(\n data: Union[pd.DataFrame, np.ndarray],\n columns: Optional[Sequence[str]] = None,\n drop_original_columns: bool = True,\n drop_category: Optional[Literal[\"first\", \"if_binary\"]] = None,\n sparse_output: bool = True,\n out_dtype: Union[type, np.dtype] = int,\n handle_unknown: Literal[\"error\", \"ignore\", \"infrequent_if_exist\"] = \"infrequent_if_exist\",\n min_frequency: Optional[Number] = None,\n max_categories: Optional[int] = None,\n) -> Union[pd.DataFrame, np.ndarray, sparse._csr.csr_matrix]:\n \"\"\"\n Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.\n\n This function converts categorical variables into a form that could be provided to machine learning\n algorithms for better prediction. For each unique category in the feature, a new binary column is created.\n\n Continuous data should not be given to this function to avoid excessive amounts of binary features. If input\n is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.\n\n The function allows control over aspects like handling unknown categories, controlling sparsity of the output,\n and setting data type of the encoded columns.\n\n Args:\n data: Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be\n restricted to specified columns.\n columns: Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are\n considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.\n drop_original_columns: If True and 'data' is a DataFrame, the original columns being encoded will\n be dropped from the output. Defaults to True.\n drop_category: Specifies a method to drop one of the categories to avoid multicollinearity.\n 'first' drops the first category, 'if_binary' drops one category only if the feature is binary.\n If None, no category is dropped. Defaults to None.\n sparse_output: Determines whether the output matrix is sparse or dense. Defaults to True (sparse).\n out_dtype: Numeric data type of the output. Defaults to int.\n handle_unknown: Specifies how to handle unknown categories encountered during transform. 'error' raises\n an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent.\n Defaults to 'infrequent_if_exist'.\n min_frequency: The minimum frequency (as a float or an int) needed to include a category in encoding.\n Optional parameter. Defaults to None.\n max_categories: The maximum number of categories to include in encoding. Optional parameter.\n Defaults to None.\n\n Returns:\n Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse)\n if input was a Numpy array.\n\n Raises:\n EmptyDataFrameException: If the input DataFrame is empty.\n InvalidDatasetException: If the input Numpy array is empty.\n InvalidColumnException: If any specified column to encode does not exist in the input DataFrame.\n \"\"\"\n is_dataframe = isinstance(data, pd.DataFrame)\n\n if is_dataframe:\n if data.empty:\n raise exceptions.EmptyDataFrameException(\"Input DataFrame is empty.\")\n df = data.copy()\n\n if columns is not None:\n if not check_columns_valid(df, columns):\n raise exceptions.InvalidColumnException(\"All selected columns were not found in the input DataFrame.\")\n transform_df = df[columns]\n else:\n transform_df = df\n else:\n if data.size == 0:\n raise exceptions.InvalidDatasetException(\"Input array is empty.\")\n transform_df = pd.DataFrame(data)\n\n encoder = OneHotEncoder(\n drop=drop_category,\n sparse_output=sparse_output,\n dtype=out_dtype,\n handle_unknown=handle_unknown,\n min_frequency=min_frequency,\n max_categories=max_categories,\n feature_name_combiner=lambda feature, category: str(feature) + \"_\" + str(category),\n )\n\n # Transform selected columns\n encoded_data = encoder.fit_transform(transform_df)\n encoded_cols = encoder.get_feature_names_out(transform_df.columns)\n\n # If input was a DataFrame, create output DataFrame\n if is_dataframe:\n if sparse_output:\n encoded_df = pd.DataFrame.sparse.from_spmatrix(encoded_data, columns=encoded_cols, index=df.index)\n else:\n encoded_df = pd.DataFrame(encoded_data, columns=encoded_cols, index=df.index)\n\n if drop_original_columns:\n df = df.drop(transform_df.columns, axis=1)\n\n encoded_data = pd.concat([df, encoded_df], axis=1)\n\n return encoded_data\n
"},{"location":"transformations/sigmoid/","title":"Sigmoid","text":""},{"location":"transformations/sigmoid/#eis_toolkit.transformations.sigmoid.sigmoid_transform","title":"sigmoid_transform(raster, bands=None, bounds=[(0, 1)], slope=[1], center=True, nodata=None)
","text":"Transform data into a sigmoid-shape based on a specified new range.
Uses the provided new minimum and maximum, shift and slope parameters to transform the data. Takes one nodata value that will be ignored in calculations.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The bounds and slope values can be set for each band individually.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
bounds
Sequence[Tuple[Number, Number]]
Boundaries for the calculation of the sigmoid function (lower, upper).
[(0, 1)]
slope
Sequence[Number]
Value which modifies the slope of the resulting sigmoid-curve.
[1]
center
bool
Center array values around mean = 0 before sigmoid transformation.
True
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values)
Source code in eis_toolkit/transformations/sigmoid.py
@beartype\ndef sigmoid_transform( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n bands: Optional[Sequence[int]] = None,\n bounds: Sequence[Tuple[Number, Number]] = [(0, 1)],\n slope: Sequence[Number] = [1],\n center: bool = True,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Transform data into a sigmoid-shape based on a specified new range.\n\n Uses the provided new minimum and maximum, shift and slope parameters to transform the data.\n Takes one nodata value that will be ignored in calculations.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The bounds and slope values can be set for each band individually.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n bounds: Boundaries for the calculation of the sigmoid function (lower, upper).\n slope: Value which modifies the slope of the resulting sigmoid-curve.\n center: Center array values around mean = 0 before sigmoid transformation.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values)\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n for parameter_name, parameter in [(\"bounds\", bounds), (\"slope\", slope)]:\n if check_parameter_length(bands, parameter) is False:\n raise NonMatchingParameterLengthsException(f\"Invalid length for {parameter_name}.\")\n\n for item in bounds:\n if check_minmax_position(item) is False:\n raise InvalidParameterValueException(f\"Invalid min-max values provided: {item}.\")\n\n expanded_args = expand_and_zip(bands, bounds, slope)\n bounds = [element[1] for element in expanded_args]\n slope = [element[2] for element in expanded_args]\n\n out_settings = {}\n out_decimals = set_max_precision()\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)\n\n band_array = _sigmoid_transform(band_array.astype(np.float64), bounds=bounds[i], slope=slope[i], center=center)\n\n band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"bound_lower\": truncate_decimal_places(bounds[i][0], decimal_places=out_decimals),\n \"bound_upper\": truncate_decimal_places(bounds[i][1], decimal_places=out_decimals),\n \"slope\": slope[i],\n \"center\": center,\n \"nodata\": nodata,\n \"decimal_places\": out_decimals,\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"transformations/winsorize/","title":"Winsorize","text":""},{"location":"transformations/winsorize/#eis_toolkit.transformations.winsorize.winsorize","title":"winsorize(raster, percentiles, bands=None, inside=False, nodata=None)
","text":"Winsorize data based on specified percentile values.
Takes one nodata value that will be ignored in calculations. Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided. Works both one-sided and two-sided but raises error if no percentile values provided.
Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%]. And percentile_upper = 10 corresponds to the intervall [90%, max]. I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.
Calculation of percentiles is ambiguous. Users can choose whether to use the value for replacement from inside or outside of the respective interval. Example: Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated percentiles are (5, 35) for inside and (10, 30) for outside. This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.
If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The percentiles can be set for each band individually, but inside parameter is same for all bands.
Parameters:
Name Type Description Default raster
DatasetReader
Data object to be transformed.
required bands
Optional[Sequence[int]]
Selection of bands to be transformed.
None
percentiles
Sequence[Tuple[Optional[Number], Optional[Number]]]
Lower and upper percentile values (lower, upper) between [0, 100].
required inside
bool
Whether to use the value for replacement from the left or right of the calculated percentile.
False
nodata
Optional[Number]
Nodata value to be considered.
None
Returns:
Name Type Description out_array
ndarray
The transformed data.
out_meta
dict
Updated metadata.
out_settings
dict
Log of input settings and calculated statistics if available.
Raises:
Type Description InvalidRasterBandException
The input contains invalid band numbers.
NonMatchingParameterLengthsException
The input does not match the number of selected bands.
InvalidParameterValueException
The input does not match the requirements (values, order of values)
Source code in eis_toolkit/transformations/winsorize.py
@beartype\ndef winsorize( # type: ignore[no-any-unimported]\n raster: rasterio.io.DatasetReader,\n percentiles: Sequence[Tuple[Optional[Number], Optional[Number]]],\n bands: Optional[Sequence[int]] = None,\n inside: bool = False,\n nodata: Optional[Number] = None,\n) -> Tuple[np.ndarray, dict, dict]:\n \"\"\"\n Winsorize data based on specified percentile values.\n\n Takes one nodata value that will be ignored in calculations.\n Replaces values between [minimum, lower percentile] and [upper percentile, maximum] if provided.\n Works both one-sided and two-sided but raises error if no percentile values provided.\n\n Percentiles are symmetrical, i.e. percentile_lower = 10 corresponds to the interval [min, 10%].\n And percentile_upper = 10 corresponds to the intervall [90%, max].\n I.e. percentile_lower = 0 refers to the minimum and percentile_upper = 0 to the data maximum.\n\n Calculation of percentiles is ambiguous. Users can choose whether to use the value\n for replacement from inside or outside of the respective interval. Example:\n Given the np.array[5 10 12 15 20 24 27 30 35] and percentiles(10, 10), the calculated\n percentiles are (5, 35) for inside and (10, 30) for outside.\n This results in [5 10 12 15 20 24 27 30 35] and [10 10 12 15 20 24 27 30 30], respectively.\n\n If no band/column selection specified, all bands/columns will be used.\n If a parameter contains only 1 entry, it will be applied for all bands.\n The percentiles can be set for each band individually, but inside parameter is same for all bands.\n\n Args:\n raster: Data object to be transformed.\n bands: Selection of bands to be transformed.\n percentiles: Lower and upper percentile values (lower, upper) between [0, 100].\n inside: Whether to use the value for replacement from the left or right of the calculated percentile.\n nodata: Nodata value to be considered.\n\n Returns:\n out_array: The transformed data.\n out_meta: Updated metadata.\n out_settings: Log of input settings and calculated statistics if available.\n\n Raises:\n InvalidRasterBandException: The input contains invalid band numbers.\n NonMatchingParameterLengthsException: The input does not match the number of selected bands.\n InvalidParameterValueException: The input does not match the requirements (values, order of values)\n \"\"\"\n bands = list(range(1, raster.count + 1)) if bands is None else bands\n nodata = raster.nodata if nodata is None else nodata\n\n if check_raster_bands(raster, bands) is False:\n raise InvalidRasterBandException(\"Invalid band selection\")\n\n if check_parameter_length(bands, percentiles) is False:\n raise NonMatchingParameterLengthsException(\"Invalid length for percentiles.\")\n\n for item in percentiles:\n if item.count(None) == len(item):\n raise InvalidParameterValueException(f\"Percentile values all None: {item}.\")\n\n if None not in item and sum(item) >= 100:\n raise InvalidParameterValueException(f\"Sum >= 100: {item}.\")\n\n if item[0] is not None and not (0 < item[0] < 100):\n raise InvalidParameterValueException(f\"Invalid lower percentile value: {item}.\")\n\n if item[1] is not None and not (0 < item[1] < 100):\n raise InvalidParameterValueException(f\"Invalid upper percentile value: {item}.\")\n\n expanded_args = expand_and_zip(bands, percentiles)\n percentiles = [element[1] for element in expanded_args]\n\n out_settings = {}\n\n for i in range(0, len(bands)):\n band_array = raster.read(bands[i])\n inital_dtype = band_array.dtype\n\n band_array = cast_array_to_float(band_array, cast_int=True)\n band_array = nodata_to_nan(band_array, nodata_value=nodata)\n\n band_array, calculated_lower, calculated_upper = _winsorize(\n band_array, percentiles=percentiles[i], inside=inside\n )\n\n band_array = nan_to_nodata(band_array, nodata_value=nodata)\n band_array = cast_array_to_int(band_array, scalar=nodata, initial_dtype=inital_dtype)\n\n band_array = np.expand_dims(band_array, axis=0)\n\n if i == 0:\n out_array = band_array.copy()\n else:\n out_array = np.vstack((out_array, band_array))\n\n current_transform = f\"transformation {i + 1}\"\n current_settings = {\n \"band_origin\": bands[i],\n \"percentile_lower\": cast_scalar_to_int(percentiles[i][0]),\n \"percentile_upper\": cast_scalar_to_int(percentiles[i][1]),\n \"calculated_lower\": cast_scalar_to_int(calculated_lower),\n \"calculated_upper\": cast_scalar_to_int(calculated_upper),\n \"nodata\": cast_scalar_to_int(nodata),\n }\n\n out_settings[current_transform] = current_settings\n\n out_meta = raster.meta.copy()\n out_meta.update({\"count\": len(bands), \"nodata\": nodata, \"dtype\": out_array.dtype.name})\n\n return out_array, out_meta, out_settings\n
"},{"location":"validation/calculate_auc/","title":"Calculate AUC","text":""},{"location":"validation/calculate_auc/#eis_toolkit.validation.calculate_auc.calculate_auc","title":"calculate_auc(x_values, y_values)
","text":"Calculate area under curve (AUC).
Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.
Parameters:
Name Type Description Default x_values
Union[ndarray, Series]
Either proportion of area or false positive rate values.
required y_values
Union[ndarray, Series]
True positive rate values.
required Returns:
Type Description float
The area under curve.
Raises:
Type Description InvalidParameterValueException
x_values or y_values are out of bounds.
Source code in eis_toolkit/validation/calculate_auc.py
@beartype\ndef calculate_auc(x_values: Union[np.ndarray, pd.Series], y_values: Union[np.ndarray, pd.Series]) -> float:\n \"\"\"Calculate area under curve (AUC).\n\n Calculates AUC for curve. X-axis should be either proportion of area ore false positive rate. Y-axis should be\n always true positive rate. AUC is calculated with sklearn.metrics.auc which uses trapezoidal rule for calculation.\n\n Args:\n x_values: Either proportion of area or false positive rate values.\n y_values: True positive rate values.\n\n Returns:\n The area under curve.\n\n Raises:\n InvalidParameterValueException: x_values or y_values are out of bounds.\n \"\"\"\n if x_values.max() > 1 or x_values.min() < 0:\n raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n if y_values.max() > 1 or y_values.min() < 0:\n raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n auc_value = _calculate_auc(x_values=x_values, y_values=y_values)\n return auc_value\n
"},{"location":"validation/calculate_base_metrics/","title":"Calculate base metrics","text":""},{"location":"validation/calculate_base_metrics/#eis_toolkit.validation.calculate_base_metrics.calculate_base_metrics","title":"calculate_base_metrics(raster, deposits, band=1, negatives=None)
","text":"Calculate true positive rate, proportion of area and false positive rate values for different thresholds.
Function calculates true positive rate, proportion of area and false positive rate values for different thresholds which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false positive rate is optional and is only done if negative point locations are provided.
Parameters:
Name Type Description Default raster
DatasetReader
Mineral prospectivity map or evidence layer.
required deposits
GeoDataFrame
Mineral deposit locations as points.
required band
int
Band index of the mineral prospectivity map. Defaults to 1.
1
negatives
Optional[GeoDataFrame]
Negative locations as points.
None
Returns:
Type Description DataFrame
DataFrame containing true positive rate, proportion of area, threshold values and false positive rate (optional) values.
Raises:
Type Description NonMatchingCrsException
The raster and point data are not in the same CRS.
NotApplicableGeometryTypeException
The input geometries contain non-point features.
Source code in eis_toolkit/validation/calculate_base_metrics.py
@beartype\ndef calculate_base_metrics(\n raster: rasterio.io.DatasetReader,\n deposits: geopandas.GeoDataFrame,\n band: int = 1,\n negatives: Optional[geopandas.GeoDataFrame] = None,\n) -> pd.DataFrame:\n \"\"\"Calculate true positive rate, proportion of area and false positive rate values for different thresholds.\n\n Function calculates true positive rate, proportion of area and false positive rate values for different thresholds\n which are determined from inputted deposit locations and mineral prospectivity map. Note that calculation of false\n positive rate is optional and is only done if negative point locations are provided.\n\n Args:\n raster: Mineral prospectivity map or evidence layer.\n deposits: Mineral deposit locations as points.\n band: Band index of the mineral prospectivity map. Defaults to 1.\n negatives: Negative locations as points.\n\n Returns:\n DataFrame containing true positive rate, proportion of area, threshold values and false positive\n rate (optional) values.\n\n Raises:\n NonMatchingCrsException: The raster and point data are not in the same CRS.\n NotApplicableGeometryTypeException: The input geometries contain non-point features.\n \"\"\"\n if negatives is not None:\n geometries = pd.concat([deposits, negatives]).geometry\n else:\n geometries = deposits[\"geometry\"]\n\n if not check_matching_crs(\n objects=[raster, geometries],\n ):\n raise NonMatchingCrsException(\"The raster and deposits are not in the same CRS.\")\n\n if not check_geometry_types(\n geometries=geometries,\n allowed_types=[\"Point\"],\n ):\n raise NotApplicableGeometryTypeException(\"The input geometries contain non-point features.\")\n\n base_metrics = _calculate_base_metrics(raster=raster, deposits=deposits, band=band, negatives=negatives)\n\n return base_metrics\n
"},{"location":"validation/get_pa_intersection/","title":"Get P-A plot intersection point","text":""},{"location":"validation/get_pa_intersection/#eis_toolkit.validation.get_pa_intersection.get_pa_intersection","title":"get_pa_intersection(true_positive_rate_values, proportion_of_area_values, threshold_values)
","text":"Calculate the intersection point for prediction rate and area curves in (P-A plot).
Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis. Area curve uses inverted proportion of area as y-axis.
Parameters:
Name Type Description Default true_positive_rate_values
Union[ndarray, Series]
True positive rate values, values should be within range 0-1.
required proportion_of_area_values
Union[ndarray, Series]
Proportion of area values, values should be within range 0-1.
required threshold_values
Union[ndarray, Series]
Threshold values that were used to calculate true positive rate and proportion of area.
required Returns:
Type Description Tuple[float, float]
X and y coordinates of the intersection point.
Raises:
Type Description InvalidParameterValueException
true_positive_rate_values or proportion_of_area_values values are out of bounds.
Source code in eis_toolkit/validation/get_pa_intersection.py
@beartype\ndef get_pa_intersection(\n true_positive_rate_values: Union[np.ndarray, pd.Series],\n proportion_of_area_values: Union[np.ndarray, pd.Series],\n threshold_values: Union[np.ndarray, pd.Series],\n) -> Tuple[float, float]:\n \"\"\"Calculate the intersection point for prediction rate and area curves in (P-A plot).\n\n Threshold_values values act as x-axis for both curves. Prediction rate curve uses true positive rate for y-axis.\n Area curve uses inverted proportion of area as y-axis.\n\n Args:\n true_positive_rate_values: True positive rate values, values should be within range 0-1.\n proportion_of_area_values: Proportion of area values, values should be within range 0-1.\n threshold_values: Threshold values that were used to calculate true positive rate and proportion of area.\n\n Returns:\n X and y coordinates of the intersection point.\n\n Raises:\n InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n \"\"\"\n if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n raise InvalidParameterValueException(\"true_positive_rate_values values should be within range 0-1\")\n\n if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n raise InvalidParameterValueException(\"proportion_of_area_values values should be within range 0-1\")\n\n intersection = _get_pa_intersection(\n true_positive_rate_values=true_positive_rate_values,\n proportion_of_area_values=proportion_of_area_values,\n threshold_values=threshold_values,\n )\n\n return intersection.x, intersection.y\n
"},{"location":"validation/plot_correlation_matrix/","title":"Plot correlation matrix","text":""},{"location":"validation/plot_correlation_matrix/#eis_toolkit.validation.plot_correlation_matrix.plot_correlation_matrix","title":"plot_correlation_matrix(matrix, annotate=True, cmap=None, plot_title=None, **kwargs)
","text":"Create a Seaborn heatmap to visualize correlation matrix.
Parameters:
Name Type Description Default matrix
DataFrame
Correlation matrix as a DataFrame.
required annotate
bool
If plot squares should display the correlation values. Defaults to True.
True
cmap
Optional[ListedColormap]
Colormap for plotting. Optional parameter. Defaults to None, in which case a default colormap is used.
None
plot_title
Optional[str]
Title of the plot. Optional parameter, defaults to none (no title).
None
**kwargs
dict
Additional parameters to pass to Seaborn and matplotlib.
{}
Returns:
Type Description Axes
Matplotlib axes object with the produced plot.
Raises:
Type Description EmptyDataFrameException
Input matrix is empty.
Source code in eis_toolkit/validation/plot_correlation_matrix.py
def plot_correlation_matrix(\n matrix: pd.DataFrame,\n annotate: bool = True,\n cmap: Optional[matplotlib.colors.ListedColormap] = None,\n plot_title: Optional[str] = None,\n **kwargs: dict\n) -> matplotlib.axes.Axes:\n \"\"\"\n Create a Seaborn heatmap to visualize correlation matrix.\n\n Args:\n matrix: Correlation matrix as a DataFrame.\n annotate: If plot squares should display the correlation values. Defaults to True.\n cmap: Colormap for plotting. Optional parameter. Defaults to None, in which\n case a default colormap is used.\n plot_title: Title of the plot. Optional parameter, defaults to none (no title).\n **kwargs: Additional parameters to pass to Seaborn and matplotlib.\n\n Returns:\n Matplotlib axes object with the produced plot.\n\n Raises:\n EmptyDataFrameException: Input matrix is empty.\n \"\"\"\n if matrix.empty:\n raise exceptions.EmptyDataFrameException(\"Input matrix DataFrame is empty.\")\n\n # Mask for the upper triangle of the heatmap\n mask = np.triu(np.ones_like(matrix, dtype=bool))\n\n if cmap is None:\n # Generate a default diverging colormap\n cmap = sns.diverging_palette(230, 20, as_cmap=True)\n\n ax = sns.heatmap(\n matrix,\n mask=mask,\n cmap=cmap,\n vmax=0.3,\n center=0,\n square=True,\n linewidths=0.5,\n annot=annotate,\n cbar_kws={\"shrink\": 0.5},\n **kwargs\n )\n if plot_title is not None:\n ax.set_title(plot_title)\n\n return ax\n
"},{"location":"validation/plot_prediction_area_curves/","title":"Plot prediction-area (P-A) curves","text":""},{"location":"validation/plot_prediction_area_curves/#eis_toolkit.validation.plot_prediction_area_curves.plot_prediction_area_curves","title":"plot_prediction_area_curves(true_positive_rate_values, proportion_of_area_values, threshold_values)
","text":"Plot prediction-area (P-A) plot.
Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g., Yousefi and Carranza (2015).
Parameters:
Name Type Description Default true_positive_rate_values
Union[ndarray, Series]
True positive rate values.
required proportion_of_area_values
Union[ndarray, Series]
Proportion of area values.
required threshold_values
Union[ndarray, Series]
Threshold values.
required Returns:
Type Description Figure
P-A plot figure object.
Raises:
Type Description InvalidParameterValueException
true_positive_rate_values or proportion_of_area_values values are out of bounds.
References Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.
Source code in eis_toolkit/validation/plot_prediction_area_curves.py
@beartype\ndef plot_prediction_area_curves(\n true_positive_rate_values: Union[np.ndarray, pd.Series],\n proportion_of_area_values: Union[np.ndarray, pd.Series],\n threshold_values: Union[np.ndarray, pd.Series],\n) -> matplotlib.figure.Figure:\n \"\"\"Plot prediction-area (P-A) plot.\n\n Plots prediction area plot that can be used to evaluate mineral prospectivity maps and evidential layers. See e.g.,\n Yousefi and Carranza (2015).\n\n Args:\n true_positive_rate_values: True positive rate values.\n proportion_of_area_values: Proportion of area values.\n threshold_values: Threshold values.\n\n Returns:\n P-A plot figure object.\n\n Raises:\n InvalidParameterValueException: true_positive_rate_values or proportion_of_area_values values are out of bounds.\n\n References:\n Yousefi, Mahyar, and Emmanuel John M. Carranza. \"Fuzzification of continuous-value spatial evidence for mineral\n prospectivity mapping.\" Computers & Geosciences 74 (2015): 97-109.\n \"\"\"\n if true_positive_rate_values.max() > 1 or true_positive_rate_values.min() < 0:\n raise InvalidParameterValueException(\"true_positive_rate values should be within range 0-1\")\n\n if proportion_of_area_values.max() > 1 or proportion_of_area_values.min() < 0:\n raise InvalidParameterValueException(\"proportion_of_area values should be within range 0-1\")\n\n fig = _plot_prediction_area_curves(\n true_positive_rate_values=true_positive_rate_values,\n proportion_of_area_values=proportion_of_area_values,\n threshold_values=threshold_values,\n )\n return fig\n
"},{"location":"validation/plot_rate_curve/","title":"Plot rate curve","text":""},{"location":"validation/plot_rate_curve/#eis_toolkit.validation.plot_rate_curve.plot_rate_curve","title":"plot_rate_curve(x_values, y_values, plot_type='success_rate')
","text":"Plot success rate, prediction rate or ROC curve.
Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.
Parameters:
Name Type Description Default x_values
Union[ndarray, Series]
False positive rate values or proportion of area values.
required y_values
Union[ndarray, Series]
True positive rate values.
required plot_type
Literal['success_rate', 'prediction_rate', 'roc']
Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".
'success_rate'
Returns:
Type Description Figure
Success rate, prediction rate or ROC plot figure object.
Raises:
Type Description InvalidParameterValueException
Invalid plot type.
InvalidParameterValueException
x_values or y_values are out of bounds.
Source code in eis_toolkit/validation/plot_rate_curve.py
@beartype\ndef plot_rate_curve(\n x_values: Union[np.ndarray, pd.Series],\n y_values: Union[np.ndarray, pd.Series],\n plot_type: Literal[\"success_rate\", \"prediction_rate\", \"roc\"] = \"success_rate\",\n) -> matplotlib.figure.Figure:\n \"\"\"Plot success rate, prediction rate or ROC curve.\n\n Plot type depends on plot_type argument. Y-axis is always true positive rate, while x-axis can be either false\n positive rate (roc) or proportion of area (success and prediction rate) depending on plot type.\n\n Args:\n x_values: False positive rate values or proportion of area values.\n y_values: True positive rate values.\n plot_type: Plot type. Can be either: \"success_rate\", \"prediction_rate\" or \"roc\".\n\n Returns:\n Success rate, prediction rate or ROC plot figure object.\n\n Raises:\n InvalidParameterValueException: Invalid plot type.\n InvalidParameterValueException: x_values or y_values are out of bounds.\n \"\"\"\n if plot_type == \"success_rate\":\n label = \"Success rate\"\n xlab = \"Proportion of area\"\n elif plot_type == \"prediction_rate\":\n label = \"Prediction rate\"\n xlab = \"Proportion of area\"\n elif plot_type == \"roc\":\n label = \"ROC\"\n xlab = \"False positive rate\"\n else:\n raise InvalidParameterValueException(\"Invalid plot type\")\n\n if x_values.max() > 1 or x_values.min() < 0:\n raise InvalidParameterValueException(\"x_values should be within range 0-1\")\n\n if y_values.max() > 1 or y_values.min() < 0:\n raise InvalidParameterValueException(\"y_values should be within range 0-1\")\n\n fig = _plot_rate_curve(x_values=x_values, y_values=y_values, label=label, xlab=xlab)\n\n return fig\n
"},{"location":"vector_processing/cell_based_association/","title":"Cell-Based Association","text":""},{"location":"vector_processing/cell_based_association/#eis_toolkit.vector_processing.cell_based_association.cell_based_association","title":"cell_based_association(cell_size, geodata, output_path, column=None, subset_target_attribute_values=None, add_name=None, add_buffer=None)
","text":"Creation of CBA matrix.
Initializes a CBA matrix from a vector file. The mesh is calculated according to the geometries contained in this file and the size of cells. Allows to add multiple vector data to the matrix, based on targeted shapes and/or attributes.
Parameters:
Name Type Description Default cell_size
int
Size of the cells.
required geodata
List[GeoDataFrame]
GeoDataFrame to create the CBA matrix. Additional GeoDataFrame(s) can be imputed to add to the CBA matrix.
required output_path
str
Name of the saved .tif file.
required column
Optional[List[str]]
Name of the column of interest. If no attribute is specified, then an artificial attribute is created representing the presence or absence of the geometries of this file for each cell of the CBA grid. A categorical attribute will generate as many columns (binary) in the CBA matrix than values considered of interest (dummification). See parameter . Additional column(s) can be imputed for each added GeoDataFrame(s). None
subset_target_attribute_values
Optional[List[Union[None, list, str]]]
List of values of interest of the target attribute, in case a categorical target attribute has been specified. Allows to filter a subset of relevant values. Additional values can be imputed for each added GeoDataFrame(s).
None
add_name
Optional[List[Union[str, None]]]
Name of the column(s) to add to the matrix.
None
add_buffer
Optional[List[Union[Number, bool]]]
Allow the use of a buffer around shapes before the intersection with CBA cells for the added GeoDataFrame(s). Minimize border effects or allow increasing positive samples (i.e. cells with mineralization). The size of the buffer is computed using the CRS (if projected CRS in meters: value in meters).
None
Returns:
Type Description GeoDataFrame
CBA matrix is created.
Source code in eis_toolkit/vector_processing/cell_based_association.py
@beartype\ndef cell_based_association(\n cell_size: int,\n geodata: List[gpd.GeoDataFrame],\n output_path: str,\n column: Optional[List[str]] = None,\n subset_target_attribute_values: Optional[List[Union[None, list, str]]] = None,\n add_name: Optional[List[Union[str, None]]] = None,\n add_buffer: Optional[List[Union[Number, bool]]] = None,\n) -> gpd.GeoDataFrame:\n \"\"\"Creation of CBA matrix.\n\n Initializes a CBA matrix from a vector file. The mesh is calculated\n according to the geometries contained in this file and the size of cells.\n Allows to add multiple vector data to the matrix, based on targeted shapes\n and/or attributes.\n\n Args:\n cell_size: Size of the cells.\n geodata: GeoDataFrame to create the CBA matrix. Additional\n GeoDataFrame(s) can be imputed to add to the CBA matrix.\n output_path: Name of the saved .tif file.\n column: Name of the column of interest. If no attribute is specified,\n then an artificial attribute is created representing the presence\n or absence of the geometries of this file for each cell of the CBA\n grid. A categorical attribute will generate as many columns (binary)\n in the CBA matrix than values considered of interest (dummification).\n See parameter <subset_target_attribute_values>. Additional\n column(s) can be imputed for each added GeoDataFrame(s).\n subset_target_attribute_values: List of values of interest of the\n target attribute, in case a categorical target attribute has been\n specified. Allows to filter a subset of relevant values. Additional\n values can be imputed for each added GeoDataFrame(s).\n add_name: Name of the column(s) to add to the matrix.\n add_buffer: Allow the use of a buffer around shapes before the\n intersection with CBA cells for the added GeoDataFrame(s). Minimize\n border effects or allow increasing positive samples (i.e. cells\n with mineralization). The size of the buffer is computed using the\n CRS (if projected CRS in meters: value in meters).\n\n Returns:\n CBA matrix is created.\n \"\"\"\n\n # Swapping None to list values\n if column is None:\n column = [\"\"]\n if add_buffer is None:\n add_buffer = [False]\n\n # Consistency checks on input data\n for frame in geodata:\n if frame.empty:\n raise exceptions.EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if cell_size <= 0:\n raise exceptions.InvalidParameterValueException(\"Expected cell size to be positive and non-zero.\")\n\n add_buffer = [False if x == 0 else x for x in add_buffer]\n if any(num < 0 for num in add_buffer):\n raise exceptions.InvalidParameterValueException(\"Expected buffer value to be positive, null or False.\")\n\n for i, name in enumerate(column):\n if column[i] == \"\":\n if subset_target_attribute_values[i] is not None:\n raise exceptions.InvalidParameterValueException(\"Can't use subset of values if no column is targeted.\")\n elif column[i] not in geodata[i]:\n raise exceptions.InvalidColumnException(\"Targeted column not found in the GeoDataFrame.\")\n\n for i, subset in enumerate(subset_target_attribute_values):\n if subset is not None:\n for value in subset:\n if value not in geodata[i][column[i]].unique():\n raise exceptions.InvalidParameterValueException(\n \"Subset of value(s) not found in the targeted column.\"\n )\n\n # Computation\n for i, data in enumerate(geodata):\n if i == 0:\n # Initialization of the CBA matrix\n grid, cba = _init_from_vector_data(cell_size, geodata[0], column[0], subset_target_attribute_values[0])\n else:\n # If necessary, adding data to matrix\n cba = _add_layer(\n cba,\n grid,\n geodata[i],\n column[i],\n subset_target_attribute_values[i],\n add_name[i - 1],\n add_buffer[i - 1],\n )\n\n # Export\n _to_raster(cba, output_path)\n\n return cba\n
"},{"location":"vector_processing/distance_computation/","title":"Distance computation","text":""},{"location":"vector_processing/distance_computation/#eis_toolkit.vector_processing.distance_computation.distance_computation","title":"distance_computation(raster_profile, geometries)
","text":"Calculate distance from raster cell to nearest geometry.
Parameters:
Name Type Description Default raster_profile
Union[Profile, dict]
The raster profile of the raster in which the distances to the nearest geometry are determined.
required geometries
GeoDataFrame
The geometries to determine distance to.
required Returns:
Type Description ndarray
A 2D numpy array with the distances computed.
Source code in eis_toolkit/vector_processing/distance_computation.py
@beartype\ndef distance_computation(raster_profile: Union[profiles.Profile, dict], geometries: gpd.GeoDataFrame) -> np.ndarray:\n \"\"\"Calculate distance from raster cell to nearest geometry.\n\n Args:\n raster_profile: The raster profile of the raster in which the distances\n to the nearest geometry are determined.\n geometries: The geometries to determine distance to.\n\n Returns:\n A 2D numpy array with the distances computed.\n\n \"\"\"\n if raster_profile.get(\"crs\") != geometries.crs:\n raise exceptions.NonMatchingCrsException(\"Expected coordinate systems to match between raster and geometries. \")\n if geometries.shape[0] == 0:\n raise exceptions.EmptyDataFrameException(\"Expected GeoDataFrame to not be empty.\")\n\n raster_width = raster_profile.get(\"width\")\n raster_height = raster_profile.get(\"height\")\n\n if not isinstance(raster_width, int) or not isinstance(raster_height, int):\n raise exceptions.InvalidParameterValueException(\n f\"Expected raster_profile to contain integer width and height. {raster_profile}\"\n )\n\n raster_transform = raster_profile.get(\"transform\")\n\n if not isinstance(raster_transform, transform.Affine):\n raise exceptions.InvalidParameterValueException(\n f\"Expected raster_profile to contain an affine transformation. {raster_profile}\"\n )\n\n return _distance_computation(\n raster_width=raster_width, raster_height=raster_height, raster_transform=raster_transform, geometries=geometries\n )\n
"},{"location":"vector_processing/extract_shared_lines/","title":"Extract shared lines","text":""},{"location":"vector_processing/extract_shared_lines/#eis_toolkit.vector_processing.extract_shared_lines.extract_shared_lines","title":"extract_shared_lines(polygons)
","text":"Extract shared lines/borders/edges between polygons.
Parameters:
Name Type Description Default polygons
GeoDataFrame
The geodataframe that contains the polygon geometries to be examined for shared lines.
required Returns:
Type Description GeoDataFrame
Geodataframe containing the shared lines that were found between the polygons.
Source code in eis_toolkit/vector_processing/extract_shared_lines.py
@beartype\ndef extract_shared_lines(polygons: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n \"\"\"Extract shared lines/borders/edges between polygons.\n\n Args:\n polygons: The geodataframe that contains the polygon geometries to be examined\n for shared lines.\n\n Returns:\n Geodataframe containing the shared lines that were found between the polygons.\n\n Raises:\n EmptyDataFrameException if input geodataframe is empty.\n InvalidParameterValueException if input geodataframe doesn't contain at least 2 polygons.\n \"\"\"\n if polygons.shape[0] == 0:\n raise exceptions.EmptyDataFrameException(\"Geodataframe is empty.\")\n\n if polygons.shape[0] < 2:\n raise exceptions.InvalidParameterValueException(\"Expected GeoDataFrame to have at least 2 polygons.\")\n\n shared_lines = _extract_shared_lines(polygons)\n\n return shared_lines\n
"},{"location":"vector_processing/idw_interpolation/","title":"IDW","text":""},{"location":"vector_processing/idw_interpolation/#eis_toolkit.vector_processing.idw_interpolation.idw","title":"idw(geodataframe, target_column, resolution, extent=None, power=2)
","text":"Calculate inverse distance weighted (IDW) interpolation.
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The vector dataframe to be interpolated.
required target_column
str
The column name with values for each geometry.
required resolution
Tuple[Number, Number]
The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).
required extent
Optional[Tuple[Number, Number, Number, Number]]
The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.
None
power
Number
The value for determining the rate at which the weights decrease. As power increases, the weights for distant points decrease rapidly. Defaults to 2.
2
Returns:
Type Description Tuple[ndarray, dict]
Rasterized vector data and metadata.
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterValueException
Invalid resolution or target_column.
Source code in eis_toolkit/vector_processing/idw_interpolation.py
@beartype\ndef idw(\n geodataframe: gpd.GeoDataFrame,\n target_column: str,\n resolution: Tuple[Number, Number],\n extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n power: Number = 2,\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Calculate inverse distance weighted (IDW) interpolation.\n\n Args:\n geodataframe: The vector dataframe to be interpolated.\n target_column: The column name with values for each geometry.\n resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n If None, calculate extent from the input vector data.\n power: The value for determining the rate at which the weights decrease.\n As power increases, the weights for distant points decrease rapidly.\n Defaults to 2.\n\n Returns:\n Rasterized vector data and metadata.\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterValueException: Invalid resolution or target_column.\n \"\"\"\n\n if geodataframe.shape[0] == 0:\n raise EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n if target_column not in geodataframe.columns:\n raise InvalidParameterValueException(\n f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n )\n\n if resolution[0] <= 0 or resolution[1] <= 0:\n raise InvalidParameterValueException(\"Expected height and width greater than zero.\")\n\n interpolated_values, out_meta = _idw_interpolation(geodataframe, target_column, resolution, power, extent)\n\n return interpolated_values, out_meta\n
"},{"location":"vector_processing/kriging_interpolation/","title":"Kriging interpolation","text":""},{"location":"vector_processing/kriging_interpolation/#eis_toolkit.vector_processing.kriging_interpolation.kriging","title":"kriging(data, target_column, resolution, extent=None, variogram_model='linear', coordinates_type='geographic', method='ordinary')
","text":"Perform Kriging interpolation on the input data.
Parameters:
Name Type Description Default data
GeoDataFrame
GeoDataFrame containing the input data.
required target_column
str
The column name with values for each geometry.
required resolution
Tuple[Number, Number]
The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).
required extent
Optional[Tuple[Number, Number, Number, Number]]
The extent of the output raster as (x_min, x_max, y_min, y_max). If None, calculate extent from the input vector data.
None
variogram_model
Literal[linear, power, gaussian, spherical, exponential]
Variogram model to be used. Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.
'linear'
coordinates_type
Literal[euclidean, geographic]
Determines are coordinates on a plane ('euclidean') or a sphere ('geographic'). Used only in ordinary kriging. Defaults to 'geographic'.
'geographic'
method
Literal[ordinary, universal]
Ordinary or universal kriging. Defaults to 'ordinary'.
'ordinary'
Returns:
Type Description Tuple[ndarray, dict]
Grid containing the interpolated values and metadata.
Raises:
Type Description EmptyDataFrameException
The input GeoDataFrame is empty.
InvalidParameterValueException
Target column name is invalid or resolution is not greater than zero.
Source code in eis_toolkit/vector_processing/kriging_interpolation.py
@beartype\ndef kriging(\n data: gpd.GeoDataFrame,\n target_column: str,\n resolution: Tuple[Number, Number],\n extent: Optional[Tuple[Number, Number, Number, Number]] = None,\n variogram_model: Literal[\"linear\", \"power\", \"gaussian\", \"spherical\", \"exponential\"] = \"linear\",\n coordinates_type: Literal[\"euclidean\", \"geographic\"] = \"geographic\",\n method: Literal[\"ordinary\", \"universal\"] = \"ordinary\",\n) -> Tuple[np.ndarray, dict]:\n \"\"\"\n Perform Kriging interpolation on the input data.\n\n Args:\n data: GeoDataFrame containing the input data.\n target_column: The column name with values for each geometry.\n resolution: The resolution i.e. cell size of the output raster as (pixel_size_x, pixel_size_y).\n extent: The extent of the output raster as (x_min, x_max, y_min, y_max).\n If None, calculate extent from the input vector data.\n variogram_model: Variogram model to be used.\n Either 'linear', 'power', 'gaussian', 'spherical' or 'exponential'. Defaults to 'linear'.\n coordinates_type: Determines are coordinates on a plane ('euclidean') or a sphere ('geographic').\n Used only in ordinary kriging. Defaults to 'geographic'.\n method: Ordinary or universal kriging. Defaults to 'ordinary'.\n\n Returns:\n Grid containing the interpolated values and metadata.\n\n Raises:\n EmptyDataFrameException: The input GeoDataFrame is empty.\n InvalidParameterValueException: Target column name is invalid or resolution is not greater than zero.\n \"\"\"\n\n if data.empty:\n raise EmptyDataFrameException(\"The input GeoDataFrame is empty.\")\n\n if target_column not in data.columns:\n raise InvalidParameterValueException(\n f\"Expected target_column ({target_column}) to be contained in geodataframe columns.\"\n )\n\n if resolution[0] <= 0 or resolution[1] <= 0:\n raise InvalidParameterValueException(\"The resolution must be greater than zero.\")\n\n data_interpolated, out_meta = _kriging(\n data, target_column, resolution, extent, variogram_model, coordinates_type, method\n )\n\n return data_interpolated, out_meta\n
"},{"location":"vector_processing/rasterize_vector/","title":"Rasterize vector","text":""},{"location":"vector_processing/rasterize_vector/#eis_toolkit.vector_processing.rasterize_vector.rasterize_vector","title":"rasterize_vector(geodataframe, resolution=None, value_column=None, default_value=1.0, fill_value=0.0, base_raster_profile=None, buffer_value=None, merge_strategy='replace')
","text":"Transform vector data into raster data.
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The vector dataframe to be rasterized.
required resolution
Optional[float]
The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.
None
value_column
Optional[str]
The column name with values for each geometry. If None, then default_value is used for all geometries.
None
default_value
float
Default value burned into raster cells based on geometries.
1.0
base_raster_profile
Optional[Union[Profile, dict]]
Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.
None
fill_value
float
Value used outside the burned/rasterized geometry cells.
0.0
buffer_value
Optional[float]
For adding a buffer around passed geometries before rasterization.
None
merge_strategy
Literal[replace, add]
How to handle overlapping geometries. \"add\" causes overlapping geometries to add together the values while \"replace\" does not. Adding them together is the basis for density computations where the density can be calculated by using a default value of 1.0 and the sum in each cell is the count of intersecting geometries.
'replace'
Returns:
Type Description Tuple[ndarray, dict]
Rasterized vector data and metadata.
Source code in eis_toolkit/vector_processing/rasterize_vector.py
@beartype\ndef rasterize_vector(\n geodataframe: gpd.GeoDataFrame,\n resolution: Optional[float] = None,\n value_column: Optional[str] = None,\n default_value: float = 1.0,\n fill_value: float = 0.0,\n base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n buffer_value: Optional[float] = None,\n merge_strategy: Literal[\"replace\", \"add\"] = \"replace\",\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Transform vector data into raster data.\n\n Args:\n geodataframe: The vector dataframe to be rasterized.\n resolution: The resolution i.e. cell size of the output raster.\n Optional if base_raster_profile is given.\n value_column: The column name with values for each geometry.\n If None, then default_value is used for all geometries.\n default_value: Default value burned into raster cells based on geometries.\n base_raster_profile: Base raster profile\n to be used for determining the grid on which vectors are\n burned in. If None, the geometries and provided resolution\n value are used to compute grid.\n fill_value: Value used outside the burned/rasterized geometry cells.\n buffer_value: For adding a buffer around passed\n geometries before rasterization.\n merge_strategy: How to handle overlapping geometries.\n \"add\" causes overlapping geometries to add together the\n values while \"replace\" does not. Adding them together is the\n basis for density computations where the density can be\n calculated by using a default value of 1.0 and the sum in\n each cell is the count of intersecting geometries.\n\n Returns:\n Rasterized vector data and metadata.\n \"\"\"\n\n if geodataframe.shape[0] == 0:\n # Empty GeoDataFrame\n raise exceptions.EmptyDataFrameException(\"Expected geodataframe to contain geometries.\")\n\n if resolution is None and base_raster_profile is None:\n raise exceptions.InvalidParameterValueException(\n \"Expected either resolution or base_raster_profile to be given.\"\n )\n if resolution is not None and resolution <= 0:\n raise exceptions.NumericValueSignException(\n f\"Expected a positive value resolution ({dict(resolution=resolution)})\"\n )\n if value_column is not None and value_column not in geodataframe.columns:\n raise exceptions.InvalidParameterValueException(\n f\"Expected value_column ({value_column}) to be contained in geodataframe columns.\"\n )\n if buffer_value is not None and buffer_value < 0:\n raise exceptions.NumericValueSignException(\n f\"Expected a positive buffer_value ({dict(buffer_value=buffer_value)})\"\n )\n\n if base_raster_profile is not None and not isinstance(base_raster_profile, (profiles.Profile, dict)):\n raise exceptions.InvalidParameterValueException(\n f\"Expected base_raster_profile ({type(base_raster_profile)}) to be dict or rasterio.profiles.Profile.\"\n )\n\n if buffer_value is not None:\n geodataframe = geodataframe.copy()\n geodataframe[\"geometry\"] = geodataframe[\"geometry\"].apply(lambda geom: geom.buffer(buffer_value))\n\n return _rasterize_vector(\n geodataframe=geodataframe,\n value_column=value_column,\n default_value=default_value,\n fill_value=fill_value,\n base_raster_profile=base_raster_profile,\n resolution=resolution,\n merge_alg=getattr(MergeAlg, merge_strategy),\n )\n
"},{"location":"vector_processing/reproject_vector/","title":"Reproject vector","text":""},{"location":"vector_processing/reproject_vector/#eis_toolkit.vector_processing.reproject_vector.reproject_vector","title":"reproject_vector(geodataframe, target_crs)
","text":"Reprojects vector data to match given coordinate reference system (EPSG).
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The vector dataframe to be reprojected.
required target_crs
int
Target CRS as an EPSG code.
required Returns:
Type Description GeoDataFrame
Reprojected vector data.
Source code in eis_toolkit/vector_processing/reproject_vector.py
@beartype\ndef reproject_vector(geodataframe: geopandas.GeoDataFrame, target_crs: int) -> geopandas.GeoDataFrame:\n \"\"\"Reprojects vector data to match given coordinate reference system (EPSG).\n\n Args:\n geodataframe: The vector dataframe to be reprojected.\n target_crs: Target CRS as an EPSG code.\n\n Returns:\n Reprojected vector data.\n \"\"\"\n\n if geodataframe.crs.to_epsg() == target_crs:\n raise MatchingCrsException(\"Vector data is already in the target CRS.\")\n\n reprojected_gdf = geodataframe.to_crs(\"epsg:\" + str(target_crs))\n return reprojected_gdf\n
"},{"location":"vector_processing/vector_density/","title":"Vector density","text":""},{"location":"vector_processing/vector_density/#eis_toolkit.vector_processing.vector_density.vector_density","title":"vector_density(geodataframe, resolution=None, base_raster_profile=None, buffer_value=None, statistic='density')
","text":"Compute density of geometries within raster.
Parameters:
Name Type Description Default geodataframe
GeoDataFrame
The dataframe with vectors of which density is computed.
required resolution
Optional[float]
The resolution i.e. cell size of the output raster. Optional if base_raster_profile is given.
None
base_raster_profile
Optional[Union[Profile, dict]]
Base raster profile to be used for determining the grid on which vectors are burned in. If None, the geometries and provided resolution value are used to compute grid.
None
buffer_value
Optional[float]
For adding a buffer around passed geometries before computing density.
None
Returns:
Type Description Tuple[ndarray, dict]
Computed density of vector data and metadata.
Source code in eis_toolkit/vector_processing/vector_density.py
@beartype\ndef vector_density(\n geodataframe: gpd.GeoDataFrame,\n resolution: Optional[float] = None,\n base_raster_profile: Optional[Union[profiles.Profile, dict]] = None,\n buffer_value: Optional[float] = None,\n statistic: Literal[\"density\", \"count\"] = \"density\",\n) -> Tuple[np.ndarray, dict]:\n \"\"\"Compute density of geometries within raster.\n\n Args:\n geodataframe: The dataframe with vectors\n of which density is computed.\n resolution: The resolution i.e. cell size of the output raster.\n Optional if base_raster_profile is given.\n base_raster_profile: Base raster profile\n to be used for determining the grid on which vectors are\n burned in. If None, the geometries and provided resolution\n value are used to compute grid.\n buffer_value: For adding a buffer around passed\n geometries before computing density.\n\n Returns:\n Computed density of vector data and metadata.\n \"\"\"\n out_raster_array, out_metadata = rasterize_vector(\n geodataframe=geodataframe,\n resolution=resolution,\n base_raster_profile=base_raster_profile,\n buffer_value=buffer_value,\n value_column=None,\n default_value=1.0,\n fill_value=0.0,\n merge_strategy=\"add\",\n )\n max_count = np.max(out_raster_array)\n if statistic == \"count\" or np.isclose(max_count, 0.0):\n return out_raster_array, out_metadata\n else:\n return (out_raster_array / max_count), out_metadata\n
"}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 16053387..6812082f 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ
diff --git a/training_data_tools/class_balancing/index.html b/training_data_tools/class_balancing/index.html
index 8bd362f1..4d430afd 100644
--- a/training_data_tools/class_balancing/index.html
+++ b/training_data_tools/class_balancing/index.html
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1062,6 +1082,26 @@
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1348,6 +1388,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/transformations/binarize/index.html b/transformations/binarize/index.html
index 7567cc04..62f427eb 100644
--- a/transformations/binarize/index.html
+++ b/transformations/binarize/index.html
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1062,6 +1082,26 @@
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1348,6 +1388,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/transformations/clip/index.html b/transformations/clip/index.html
index a60408ff..c9943a4c 100644
--- a/transformations/clip/index.html
+++ b/transformations/clip/index.html
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1062,6 +1082,26 @@
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1348,6 +1388,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/transformations/linear/index.html b/transformations/linear/index.html
index 873c0027..219096ae 100644
--- a/transformations/linear/index.html
+++ b/transformations/linear/index.html
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1069,6 +1089,26 @@
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1355,6 +1395,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/transformations/logarithmic/index.html b/transformations/logarithmic/index.html
index 9f664af9..74251543 100644
--- a/transformations/logarithmic/index.html
+++ b/transformations/logarithmic/index.html
@@ -12,7 +12,7 @@
-
+
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1062,6 +1082,26 @@
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1348,6 +1388,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/transformations/one_hot_encoding/index.html b/transformations/one_hot_encoding/index.html
new file mode 100644
index 00000000..5244f015
--- /dev/null
+++ b/transformations/one_hot_encoding/index.html
@@ -0,0 +1,2094 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ One-hot encoding - EIS Toolkit
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.
+
This function converts categorical variables into a form that could be provided to machine learning
+algorithms for better prediction. For each unique category in the feature, a new binary column is created.
+
Continuous data should not be given to this function to avoid excessive amounts of binary features. If input
+is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.
+
The function allows control over aspects like handling unknown categories, controlling sparsity of the output,
+and setting data type of the encoded columns.
+
+
+
+
Parameters:
+
+
+
+ Name
+ Type
+ Description
+ Default
+
+
+
+
+ data
+
+ Union [DataFrame , ndarray ]
+
+
+
+
Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be
+restricted to specified columns.
+
+
+
+ required
+
+
+
+ columns
+
+ Optional [Sequence [str]]
+
+
+
+
Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are
+considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.
+
+
+
+ None
+
+
+
+ drop_original_columns
+
+ bool
+
+
+
+
If True and 'data' is a DataFrame, the original columns being encoded will
+be dropped from the output. Defaults to True.
+
+
+
+ True
+
+
+
+ drop_category
+
+ Optional [Literal [first, if_binary]]
+
+
+
+
Specifies a method to drop one of the categories to avoid multicollinearity.
+'first' drops the first category, 'if_binary' drops one category only if the feature is binary.
+If None, no category is dropped. Defaults to None.
+
+
+
+ None
+
+
+
+ sparse_output
+
+ bool
+
+
+
+
Determines whether the output matrix is sparse or dense. Defaults to True (sparse).
+
+
+
+ True
+
+
+
+ out_dtype
+
+ Union [type, dtype ]
+
+
+
+
Numeric data type of the output. Defaults to int.
+
+
+
+ int
+
+
+
+ handle_unknown
+
+ Literal [error, ignore, infrequent_if_exist]
+
+
+
+
Specifies how to handle unknown categories encountered during transform. 'error' raises
+an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent.
+Defaults to 'infrequent_if_exist'.
+
+
+
+ 'infrequent_if_exist'
+
+
+
+ min_frequency
+
+ Optional [Number ]
+
+
+
+
The minimum frequency (as a float or an int) needed to include a category in encoding.
+Optional parameter. Defaults to None.
+
+
+
+ None
+
+
+
+ max_categories
+
+ Optional [int]
+
+
+
+
The maximum number of categories to include in encoding. Optional parameter.
+Defaults to None.
+
+
+
+ None
+
+
+
+
+
+
+
+
Returns:
+
+
+
+ Type
+ Description
+
+
+
+
+
+ Union [DataFrame , ndarray , csr_matrix ]
+
+
+
+
Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse)
+if input was a Numpy array.
+
+
+
+
+
+
+
+
+
Raises:
+
+
+
+ Type
+ Description
+
+
+
+
+
+ EmptyDataFrameException
+
+
+
+
If the input DataFrame is empty.
+
+
+
+
+
+ InvalidDatasetException
+
+
+
+
If the input Numpy array is empty.
+
+
+
+
+
+ InvalidColumnException
+
+
+
+
If any specified column to encode does not exist in the input DataFrame.
+
+
+
+
+
+
+
+ Source code in eis_toolkit/transformations/one_hot_encoding.py
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111 @beartype
+def one_hot_encode (
+ data : Union [ pd . DataFrame , np . ndarray ],
+ columns : Optional [ Sequence [ str ]] = None ,
+ drop_original_columns : bool = True ,
+ drop_category : Optional [ Literal [ "first" , "if_binary" ]] = None ,
+ sparse_output : bool = True ,
+ out_dtype : Union [ type , np . dtype ] = int ,
+ handle_unknown : Literal [ "error" , "ignore" , "infrequent_if_exist" ] = "infrequent_if_exist" ,
+ min_frequency : Optional [ Number ] = None ,
+ max_categories : Optional [ int ] = None ,
+) -> Union [ pd . DataFrame , np . ndarray , sparse . _csr . csr_matrix ]:
+ """
+ Perform one-hot (or one-of-K or dummy) encoding on categorical data in a DataFrame or NumPy array.
+
+ This function converts categorical variables into a form that could be provided to machine learning
+ algorithms for better prediction. For each unique category in the feature, a new binary column is created.
+
+ Continuous data should not be given to this function to avoid excessive amounts of binary features. If input
+ is a DataFrame, continuous data can be excluded from encoding by specifying columns to encode.
+
+ The function allows control over aspects like handling unknown categories, controlling sparsity of the output,
+ and setting data type of the encoded columns.
+
+ Args:
+ data: Input data as a DataFrame or Numpy array. If a DataFrame is provided, the operation can be
+ restricted to specified columns.
+ columns: Specifies the columns to encode if 'data' is a DataFrame. If None, all columns are
+ considered for encoding. Ignored if 'data' is a Numpy array. Defaults to None.
+ drop_original_columns: If True and 'data' is a DataFrame, the original columns being encoded will
+ be dropped from the output. Defaults to True.
+ drop_category: Specifies a method to drop one of the categories to avoid multicollinearity.
+ 'first' drops the first category, 'if_binary' drops one category only if the feature is binary.
+ If None, no category is dropped. Defaults to None.
+ sparse_output: Determines whether the output matrix is sparse or dense. Defaults to True (sparse).
+ out_dtype: Numeric data type of the output. Defaults to int.
+ handle_unknown: Specifies how to handle unknown categories encountered during transform. 'error' raises
+ an error, 'ignore' ignores unknown categories, and 'infrequent_if_exist' treats them as infrequent.
+ Defaults to 'infrequent_if_exist'.
+ min_frequency: The minimum frequency (as a float or an int) needed to include a category in encoding.
+ Optional parameter. Defaults to None.
+ max_categories: The maximum number of categories to include in encoding. Optional parameter.
+ Defaults to None.
+
+ Returns:
+ Encoded data as a DataFrame if input was a DataFrame, or as a Numpy array (dense or sparse)
+ if input was a Numpy array.
+
+ Raises:
+ EmptyDataFrameException: If the input DataFrame is empty.
+ InvalidDatasetException: If the input Numpy array is empty.
+ InvalidColumnException: If any specified column to encode does not exist in the input DataFrame.
+ """
+ is_dataframe = isinstance ( data , pd . DataFrame )
+
+ if is_dataframe :
+ if data . empty :
+ raise exceptions . EmptyDataFrameException ( "Input DataFrame is empty." )
+ df = data . copy ()
+
+ if columns is not None :
+ if not check_columns_valid ( df , columns ):
+ raise exceptions . InvalidColumnException ( "All selected columns were not found in the input DataFrame." )
+ transform_df = df [ columns ]
+ else :
+ transform_df = df
+ else :
+ if data . size == 0 :
+ raise exceptions . InvalidDatasetException ( "Input array is empty." )
+ transform_df = pd . DataFrame ( data )
+
+ encoder = OneHotEncoder (
+ drop = drop_category ,
+ sparse_output = sparse_output ,
+ dtype = out_dtype ,
+ handle_unknown = handle_unknown ,
+ min_frequency = min_frequency ,
+ max_categories = max_categories ,
+ feature_name_combiner = lambda feature , category : str ( feature ) + "_" + str ( category ),
+ )
+
+ # Transform selected columns
+ encoded_data = encoder . fit_transform ( transform_df )
+ encoded_cols = encoder . get_feature_names_out ( transform_df . columns )
+
+ # If input was a DataFrame, create output DataFrame
+ if is_dataframe :
+ if sparse_output :
+ encoded_df = pd . DataFrame . sparse . from_spmatrix ( encoded_data , columns = encoded_cols , index = df . index )
+ else :
+ encoded_df = pd . DataFrame ( encoded_data , columns = encoded_cols , index = df . index )
+
+ if drop_original_columns :
+ df = df . drop ( transform_df . columns , axis = 1 )
+
+ encoded_data = pd . concat ([ df , encoded_df ], axis = 1 )
+
+ return encoded_data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/transformations/sigmoid/index.html b/transformations/sigmoid/index.html
index 5fecb618..fddb053b 100644
--- a/transformations/sigmoid/index.html
+++ b/transformations/sigmoid/index.html
@@ -9,7 +9,7 @@
-
+
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1007,6 +1027,26 @@
+
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1348,6 +1388,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/transformations/winsorize/index.html b/transformations/winsorize/index.html
index 028a8bdc..92d19ac5 100644
--- a/transformations/winsorize/index.html
+++ b/transformations/winsorize/index.html
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1008,6 +1028,26 @@
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1348,6 +1388,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/validation/calculate_auc/index.html b/validation/calculate_auc/index.html
index 9f5bae75..e92314d3 100644
--- a/validation/calculate_auc/index.html
+++ b/validation/calculate_auc/index.html
@@ -800,6 +800,26 @@
+
+
+
+
+
+ Unique combinations in rasters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1006,6 +1026,26 @@
+
+
+
+
+
+ One-hot encoding
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1348,6 +1388,26 @@
+
+
+
+
+
+ Extract shared lines
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1564,7 +1624,7 @@