diff --git a/src/scripts/LULCC/preparation/Get_SoilGrids.py b/src/scripts/LULCC/preparation/Get_SoilGrids.py
new file mode 100644
index 0000000..78de84f
--- /dev/null
+++ b/src/scripts/LULCC/preparation/Get_SoilGrids.py
@@ -0,0 +1,70 @@
+from owslib.wcs import WebCoverageService
+import rasterio
+import matplotlib.pyplot as plt
+from rasterio import plot
+
+# WCS URLs for different soil properties
+wcs_urls = {
+ 'phh2o': 'http://maps.isric.org/mapserv?map=/map/phh2o.map',
+ 'bdod': 'http://maps.isric.org/mapserv?map=/map/bdod.map',
+ 'cec': 'http://maps.isric.org/mapserv?map=/map/cec.map',
+ 'cfvo': 'https://maps.isric.org/mapserv?map=/map/cfvo.map',
+ 'clay': 'https://maps.isric.org/mapserv?map=/map/clay.map',
+ 'nitrogen': 'https://maps.isric.org/mapserv?map=/map/nitrogen.map',
+ 'sand': 'https://maps.isric.org/mapserv?map=/map/sand.map',
+ 'silt': 'https://maps.isric.org/mapserv?map=/map/silt.map',
+ 'soc': 'https://maps.isric.org/mapserv?map=/map/soc.map',
+<<<<<<< HEAD
+=======
+ 'ocs': 'https://maps.isric.org/mapserv?map=/map/ocs.map',
+>>>>>>> 67f953ec92d5f3e8523aaa160e6654c867c0ebd5
+ 'ocd': 'https://maps.isric.org/mapserv?map=/map/ocd.map'
+
+ # Add other WCS URLs here
+ # ...
+}
+
+# Soil properties
+<<<<<<< HEAD
+soil_properties = ['phh2o', 'bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'sand', 'silt', 'soc', 'ocd']
+=======
+soil_properties = ['phh2o', 'bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'sand', 'silt', 'soc', 'ocd', 'ocs']
+>>>>>>> 67f953ec92d5f3e8523aaa160e6654c867c0ebd5
+
+# Depth intervals
+depth_intervals = ['0-5cm', '5-15cm', '15-30cm', '30-60cm', '60-100cm', '100-200cm']
+
+# A bounding box broadly matching Peru
+bbox = (-81.81, -18.80, -68.15, 0.32)
+
+for property in soil_properties:
+ wcs_url = wcs_urls.get(property, '')
+ if wcs_url:
+ wcs = WebCoverageService(wcs_url, version='1.0.0')
+
+ for depth in depth_intervals:
+ identifier = f'{property}_{depth}_mean'
+ file_name = f'./data/{property}_{depth}_mean.tif'
+ title = f'Mean {property} between {depth} deep in Peru'
+
+ # Fetch the map segment within the bounding box
+ response = wcs.getCoverage(
+ identifier=identifier,
+ crs='urn:ogc:def:crs:EPSG:4326',
+ bbox=bbox,
+ resx=0.002, resy=0.002,
+ format='GEOTIFF_INT16')
+
+ # Fetch the coverage for Peru and save it to disk
+ with open(file_name, 'wb') as file:
+ file.write(response.read())
+ print(f"download complete: {file_name}")
+
+ # Open the file from disk
+ ph = rasterio.open(file_name, driver="GTiff")
+
+ # Use the plot class to plot the map
+ plot.show(ph, title=title, cmap='gist_ncar')
+ plt.show()
+ else:
+ print(f"WCS URL for {property} not defined.")
diff --git a/src/scripts/LULCC/preparation/Rasterize_shapefile.R b/src/scripts/LULCC/preparation/Rasterize_shapefile.R
new file mode 100644
index 0000000..44602f2
--- /dev/null
+++ b/src/scripts/LULCC/preparation/Rasterize_shapefile.R
@@ -0,0 +1,66 @@
+########################################################################
+## Script name: Rasterize_shapefile
+## Purpose of script: rasterize the shapefils of three geographical regions in Peru
+## Author: Chenyu Shen
+## Date Created: 2023-10-27
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("terra")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd("C:/Work/Peru-env-futures")
+
+# Read in the shapefile
+regions <- vect("src/utils/Geographical_Region/region-geografica.shp")
+
+# Read in the reference raster
+ref_raster <- rast("src/utils/ref_grid.tif")
+
+
+### =========================================================================
+### B - Check and Transform CRS
+### =========================================================================
+
+# Check CRS of both files
+crs_regions <- crs(regions)
+crs_ref <- crs(ref_raster)
+
+# Transform if necessary
+if (crs_regions != crs_ref) {
+ regions <- project(regions, crs_ref)
+}
+
+
+### =========================================================================
+### C - Rasterize the shapefile
+### =========================================================================
+
+# Access and view the attibutes table of the shapefile
+attributes <- values(regions)
+print(attributes)
+
+# Rasterize
+rasterized_regions <- rasterize(regions, ref_raster, field="nombre", fun="min")
+
+# Plot rasterized data
+plot(rasterized_regions, col=rainbow(3))
+
+# Overlay vector data (e.g., boundaries or points)
+plot(regions, add=TRUE)
+
+writeRaster(rasterized_regions, "src/utils/rasterized_regions.tif")
diff --git a/src/scripts/LULCC/preparation/Ref_grid_creation.R b/src/scripts/LULCC/preparation/Ref_grid_creation.R
index 0540149..3f9b43f 100644
--- a/src/scripts/LULCC/preparation/Ref_grid_creation.R
+++ b/src/scripts/LULCC/preparation/Ref_grid_creation.R
@@ -1,19 +1,19 @@
########################################################################
-## Script name:
-## Purpose of script:
-## Author:
+## Script name: Ref_grid_creation
+## Purpose of script: Set up uniform spatial grid for project as reference
+## Author: Chenyu Shen
## Date Created: 2023-10-13
## Notes:
########################################################################
### =========================================================================
-### Preparation
+### A - Preparation
### =========================================================================
## Install and load packages
#vector other required packages
-packs<-c()
+packs<-c("terra","raster")
#install new packages
new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
@@ -22,5 +22,64 @@ if (length(new.packs)) install.packages(new.packs)
# Load required packages
invisible(lapply(packs, require, character.only = TRUE))
-# Source custom functions
-invisible(sapply(list.files("Scripts/Functions",
\ No newline at end of file
+# Set working directory
+setwd(Data_dir)
+
+# Load one of the LULC layers
+input_raster <- rast("LULC/Copernicus/Peru_Land_Cover_2019.tif")
+
+
+
+### =========================================================================
+### B - Extract information and remove unnecessary cells
+### =========================================================================
+
+# Extract extent, CRS, and resolution information
+extent_info <- ext(input_raster)
+crs_info <- crs(input_raster)
+res_info <- res(input_raster)
+nrows <- nrow(input_raster)
+ncols <- ncol(input_raster)
+
+
+# Create a reference grid with the same extent, CRS, and resolution
+ref_grid <- rast(extent_info, nrows = nrows, ncols = ncols, crs = crs_info, vals = NA)
+
+# Define the extent of desired region (xmin, xmax, ymin, ymax)
+desired_extent <- c(xmin = -81.81, xmax = -68.15, ymin = -18.80, ymax = 0.32)
+
+# Crop the reference grid to the extent of desired region
+cropped_gird <- crop(ref_grid, desired_extent)
+
+# Save the cropped raster
+writeRaster(cropped_gird, "ref_grid.tif", overwrite = TRUE)
+
+
+### =========================================================================
+### C - Calculate the number of pixels within the Peru boundary
+### =========================================================================
+
+# # Define the number of chunks (adjust this value as needed, more chunks for larger rasters)
+# num_chunks <- 100
+#
+# # Get the number of rows per chunk
+# rows_per_chunk <- ceiling(nrow(input_raster) / num_chunks)
+#
+# # Initialize the count
+# non_zero_pixels <- 0
+#
+# # Loop over each chunk
+# for (i in 1:num_chunks) {
+# # Define the start and end rows for the current chunk
+# start_row <- ((i-1) * rows_per_chunk) + 1
+# end_row <- min(i * rows_per_chunk, nrow(input_raster))
+#
+# # Retrieve the values for the chunk
+# chunk <- input_raster[start_row:end_row, ]
+#
+#
+# # Sum the non-zero values in the chunk and add to the total count
+# non_zero_pixels <- non_zero_pixels + sum(chunk != 0, na.rm = TRUE)
+# }
+#
+# print(non_zero_pixels)
diff --git a/src/scripts/LULCC/preparation/Write_to_excel.py b/src/scripts/LULCC/preparation/Write_to_excel.py
new file mode 100644
index 0000000..82aaad0
--- /dev/null
+++ b/src/scripts/LULCC/preparation/Write_to_excel.py
@@ -0,0 +1,42 @@
+import os
+import pandas as pd
+
+# Set the directory where your TIF files are located
+directory_path = 'C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Soil'
+relative_path_root = 'Peru_data_collab/Preds/Raw/Soil'
+
+# List all TIF files in the directory
+tif_files = [f for f in os.listdir(directory_path) if f.endswith('.tif')]
+
+# Load existing data from the Excel file
+excel_path = "C:/Users/chshen/OneDrive - ETH Zurich/Work/predictor_table.xlsx" # Replace with the path to your Excel file
+existing_data = pd.read_excel(excel_path)
+
+# Append each TIF file path as a new row to the DataFrame
+for tif_file in tif_files:
+ # Extract the file name without the .tif extension to use as the 'Variable_name'
+ variable_name = os.path.splitext(tif_file)[0]
+
+ # Construct the relative file path
+ relative_file_path = os.path.join(relative_path_root, tif_file)
+
+ # Define the new data entry
+ new_data_entry = {
+ 'Variable_name': variable_name,
+ 'Data_citation': 'Soilgrid',
+ 'URL': 'https://maps.isric.org/',
+ 'Original_resolution': '250m',
+ 'Static_or_dynamic': 'static', # or 'dynamic' based on your data
+ 'Temporal_coverage': '',
+ 'Temporal_resolution': '',
+ 'Prepared': 'No',
+ 'Raw_data_path': relative_file_path,
+ # ... add other fields as necessary
+ }
+
+ # Convert the dictionary to a DataFrame to ensure similar structure
+ new_data_df = pd.DataFrame([new_data_entry])
+ existing_data = pd.concat([existing_data, new_data_df], ignore_index=True)
+
+# Save the updated DataFrame back to an Excel file
+existing_data.to_excel(excel_path, index=False)
diff --git a/src/scripts/preparation/Aggregate_LULC.R b/src/scripts/preparation/Aggregate_LULC.R
new file mode 100644
index 0000000..b74f7cc
--- /dev/null
+++ b/src/scripts/preparation/Aggregate_LULC.R
@@ -0,0 +1,55 @@
+########################################################################
+## Script name: Aggregate_LULC
+## Purpose of script: Aggregate the MapBiomas LULC data to 120m resolution
+## Author: Chenyu Shen
+## Date Created: 2023-11-20
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("terra","raster","sp")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+# Set output directory
+output_dir <- "LULC/MapBiomas_120m"
+
+### =========================================================================
+### B - Aggregate the Data
+### =========================================================================
+for (year in 1985:2021) {
+ # Construct file path for the input raster
+ input_path <- sprintf("LULC/MapBiomas/peru_coverage_%d.tif", year)
+
+ # Check if the file exists
+ if (!file.exists(input_path)) {
+ next # Skip to the next iteration if the file does not exist
+ }
+
+ # Load the raster
+ lulc_30m <- raster(input_path)
+
+ # Aggregate to 120m resolution
+ agg_factor <- 4 # 30m to 120m needs a factor of 4
+ lulc_120m <- aggregate(lulc_30m, fact=agg_factor, fun=modal, na.rm=TRUE)
+
+ # Construct file path for the output raster
+ output_path <- sprintf("%s/peru_coverage_120m_%d.tif", output_dir, year)
+
+ # Save the aggregated raster
+ writeRaster(lulc_120m, filename=output_path, overwrite=TRUE)
+}
\ No newline at end of file
diff --git a/src/scripts/preparation/Distance_to_water.R b/src/scripts/preparation/Distance_to_water.R
new file mode 100644
index 0000000..ec5d7d3
--- /dev/null
+++ b/src/scripts/preparation/Distance_to_water.R
@@ -0,0 +1,65 @@
+########################################################################
+## Script name: Distance_to_water
+## Purpose of script: Calculate the distance from water bodies (navigable rivers,
+## small rivers and streams, lakes and lagoons)
+## Author: Chenyu Shen
+## Date Created: 2023-11-15
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("raster", "sf", "terra")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+
+# Load the shapefiles of waterbody
+navigable_rivers <- st_read("Preds/Raw/Shapefiles/navigable_river/Rio_navegables.shp")
+small_rivers <- st_read("Preds/Raw/Shapefiles/river_stream/Rios_Quebradas.shp")
+lakes_lagoons <- st_read("Preds/Raw/Shapefiles/lake_lagoon/Lagos_lagunas_Project.shp")
+
+# Load and reproject Peru's boundary to match the CRS of the water body rasters
+peru_boundary <- st_read("Preds/Raw/Shapefiles/Country/nivel-politico-1.shp")
+template_raster <- rast("ref_grid.tif")
+peru_boundary_transformed <- st_transform(peru_boundary, crs = crs(template_raster))
+
+### =========================================================================
+### B - Calculate Distance
+### =========================================================================
+
+# Rasterize the Shapefiles
+raster_navigable_rivers <- rasterize(navigable_rivers, template_raster,field=1)
+raster_small_rivers <- rasterize(small_rivers, template_raster,field=1)
+raster_lakes_lagoons <- rasterize(lakes_lagoons, template_raster,field=1)
+# field=1 is used to mark the presence of the feature.
+# It's a placeholder to indicate where the features are in the raster grid
+
+# Clip the rasters to Peru's boundary
+raster_navigable_rivers_clipped <- mask(raster_navigable_rivers, peru_boundary_transformed)
+raster_small_rivers_clipped <- mask(raster_small_rivers, peru_boundary_transformed)
+raster_lakes_lagoons_clipped <- mask(raster_lakes_lagoons, peru_boundary_transformed)
+
+# Calculate Distance within Peru's boundary
+distance_navigable_rivers <- terra::distance(raster_navigable_rivers_clipped)
+distance_small_rivers <- terra::distance(raster_small_rivers_clipped)
+distance_lakes_lagoons <- terra::distance(raster_lakes_lagoons_clipped)
+
+
+# Write the output as Tiff files
+writeRaster(distance_navigable_rivers, "distance_navigable_rivers.tif")
+writeRaster(distance_small_rivers, "distance_small_rivers.tif")
+writeRaster(distance_lakes_lagoons, "distance_lakes_lagoons.tif")
diff --git a/src/scripts/preparation/Plot_LULC.R b/src/scripts/preparation/Plot_LULC.R
new file mode 100644
index 0000000..9952217
--- /dev/null
+++ b/src/scripts/preparation/Plot_LULC.R
@@ -0,0 +1,100 @@
+########################################################################
+## Script name: Plot_LULC
+## Purpose of script:Plot LULC data
+## Author: Chenyu Shen
+## Date Created: 2023-10-23
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("terra","raster")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+# Load all the LULC files
+years <- 1985:1990
+LULC_list <- list()
+
+for(year in years){
+ path <- paste0("LULC/peru_coverage_",year,".tif")
+ LULC_list[[as.character(year)]] <- rast(path)
+}
+
+
+
+my_colors <- c(
+ "3" = "#006400",
+ "4" = "#32CD32",
+ "5" = "#687537",
+ "6" = "#76A5AF",
+ "9" = "#935132",
+ "11" = "#45C2A5",
+ "12" = "#B8AF4F",
+ "13" = "#F1C232",
+ "15" = "#FFD966",
+ "18" = "#E974ED",
+ "21" = "#FFEFC3",
+ "24" = "#AA0000",
+ "30" = "#FF0000",
+ "25" = "#FF8585",
+ "33" = "#0000FF",
+ "34" = "#4FD3FF",
+ "27" = "#D5D5E5"
+)
+
+labels <- c(
+ "3" = "Forest",
+ "4" = "Dry forest",
+ "5" = "Mangrove",
+ "6" = "Flooded forest",
+ "9" = "Forest plantation",
+ "11" = "Wetland",
+ "12" = "Grasslands/herbaceous",
+ "13" = "Scrubland and other non forest formations",
+ "15" = "Pasture",
+ "18" = "Agriculture",
+ "21" = "Agricultural mosaic",
+ "24" = "Infrastructure",
+ "30" = "Mining",
+ "25" = "Other non vegetated area",
+ "33" = "River,lake and ocean",
+ "34" = "Glacier",
+ "27" = "Not observed"
+)
+
+land_cover_map <- LULC_list[["1985"]]
+
+# Get unique values using freq
+unique_vals <- freq(land_cover_map)[, 2]
+
+# Match unique values with the predefined colors and labels
+plot_colors <- my_colors[as.character(unique_vals)]
+plot_labels <- labels[as.character(unique_vals)]
+
+
+
+# 设置布局为1行2列,主要为了绘制地图和图例
+layout(matrix(1:2, nrow=1), widths=c(4, 1))
+
+# 在第一个区域绘制土地利用图
+par(mar=c(5, 5, 4, 2)) # 调整边距
+plot(land_cover_map, col=plot_colors, main=paste("Land Cover Map", year), axes=TRUE, legend=FALSE)
+
+# 在第二个区域绘制图例
+par(mar=c(5, 2, 4, 2)) # 调整边距
+plot.new()
+legend("center", legend=plot_labels, fill=plot_colors, title="Land Cover Types", cex=0.5, bty="n")
diff --git a/src/scripts/preparation/Python/.idea/.gitignore b/src/scripts/preparation/Python/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml b/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/misc.xml b/src/scripts/preparation/Python/.idea/misc.xml
new file mode 100644
index 0000000..9de2865
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/modules.xml b/src/scripts/preparation/Python/.idea/modules.xml
new file mode 100644
index 0000000..2a08ed2
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/preparation.iml b/src/scripts/preparation/Python/.idea/preparation.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/preparation.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/vcs.xml b/src/scripts/preparation/Python/.idea/vcs.xml
new file mode 100644
index 0000000..77a3cc7
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/vcs.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg b/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg
new file mode 100644
index 0000000..cd89cb9
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg
@@ -0,0 +1 @@
+ISO-8859-1
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf b/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf
new file mode 100644
index 0000000..6c144a5
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.prj b/src/scripts/preparation/Python/Data/reprojected_lagoons.prj
new file mode 100644
index 0000000..e9ae3f0
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_lagoons.prj
@@ -0,0 +1 @@
+PROJCS["Peru_Central_Zone",GEOGCS["GCS_Provisional_S_American_1956",DATUM["D_Provisional_S_American_1956",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",720000.0],PARAMETER["False_Northing",1039979.159],PARAMETER["Central_Meridian",-76.0],PARAMETER["Scale_Factor",0.99932994],PARAMETER["Latitude_Of_Origin",-9.5],UNIT["Meter",1.0]]
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.shp b/src/scripts/preparation/Python/Data/reprojected_lagoons.shp
new file mode 100644
index 0000000..343bbce
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.shp differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.shx b/src/scripts/preparation/Python/Data/reprojected_lagoons.shx
new file mode 100644
index 0000000..59d8d95
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.shx differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.cpg b/src/scripts/preparation/Python/Data/reprojected_rivers.cpg
new file mode 100644
index 0000000..cd89cb9
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_rivers.cpg
@@ -0,0 +1 @@
+ISO-8859-1
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.dbf b/src/scripts/preparation/Python/Data/reprojected_rivers.dbf
new file mode 100644
index 0000000..aab7647
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.dbf differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.prj b/src/scripts/preparation/Python/Data/reprojected_rivers.prj
new file mode 100644
index 0000000..e9ae3f0
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_rivers.prj
@@ -0,0 +1 @@
+PROJCS["Peru_Central_Zone",GEOGCS["GCS_Provisional_S_American_1956",DATUM["D_Provisional_S_American_1956",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",720000.0],PARAMETER["False_Northing",1039979.159],PARAMETER["Central_Meridian",-76.0],PARAMETER["Scale_Factor",0.99932994],PARAMETER["Latitude_Of_Origin",-9.5],UNIT["Meter",1.0]]
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.shp b/src/scripts/preparation/Python/Data/reprojected_rivers.shp
new file mode 100644
index 0000000..949be16
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.shp differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.shx b/src/scripts/preparation/Python/Data/reprojected_rivers.shx
new file mode 100644
index 0000000..e828388
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.shx differ
diff --git a/src/scripts/preparation/Python/DistanceToWater.py b/src/scripts/preparation/Python/DistanceToWater.py
new file mode 100644
index 0000000..712056a
--- /dev/null
+++ b/src/scripts/preparation/Python/DistanceToWater.py
@@ -0,0 +1,72 @@
+import geopandas as gpd
+import rasterio
+from rasterio.features import rasterize
+from scipy.ndimage import distance_transform_edt
+import numpy as np
+from rasterio.mask import mask
+
+def calculate_distance_to_river(river_shapefile, template_raster_path, meta):
+ rivers_gdf = gpd.read_file(river_shapefile)
+ with rasterio.open(template_raster_path) as src:
+ river_raster = rasterize(
+ [(geometry, 1) for geometry in rivers_gdf.geometry],
+ out_shape=(src.height, src.width),
+ transform=src.transform,
+ fill=0,
+ all_touched=True,
+ dtype='float32'
+ )
+
+ # Compute Distance to Nearest River
+ inverse_river_raster = np.where(river_raster == 1, 0, 1)
+ distance = distance_transform_edt(inverse_river_raster) * src.res[0]
+
+ return distance
+
+# Paths to the reprojected river shapefiles
+river_shapefile = './Data/reprojected_rivers.shp'
+lagoon_shapefile = './Data/reprojected_lagoons.shp'
+template_raster_path = './Data/reprojected_raster.tif'
+
+# Extract metadata from the template raster
+with rasterio.open(template_raster_path) as src:
+ meta = src.meta.copy()
+
+# Calculate distances
+distance_river = calculate_distance_to_river(river_shapefile, template_raster_path, meta)
+distance_lagoon = calculate_distance_to_river(lagoon_shapefile, template_raster_path, meta)
+
+# Path Peru boundary shapefile
+peru_boundary_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Peru_admin_boud/per_admbnda_adm0_ign_20200714.shp")
+utm_crs = 'EPSG:24892'
+peru_boundary_utm = peru_boundary_gdf.to_crs(utm_crs)
+
+# Clip the distance rater to the boundary Peru
+def clip_raster_with_shapefile(raster_path, shapefile_path, output_raster_path):
+ # Load the boundary shapefile
+ boundary_gdf = gpd.read_file(shapefile_path)
+
+ # Load the raster to be clipped
+ with rasterio.open(raster_path) as src:
+ out_image, out_transform = mask(src, boundary_gdf.geometry, crop=True)
+ out_meta = src.meta.copy()
+
+ # Update the metadata to reflect the new raster dimensions
+ out_meta.update({"driver": "GTiff",
+ "height": out_image.shape[1],
+ "width": out_image.shape[2],
+ "transform": out_transform})
+
+ # Save the clipped raster
+ with rasterio.open(output_raster_path, 'w', **out_meta) as dest:
+ dest.write(out_image)
+
+# File paths
+clipped_river_raster = 'clipped_river_raster.tif'
+clipped_lagoon_raster = 'clipped_lagoon_raster.tif'
+
+# Clip the rasters
+clip_raster_with_shapefile(distance_river, peru_boundary_utm, clipped_river_raster)
+clip_raster_with_shapefile(distance_lagoon, peru_boundary_utm, clipped_lagoon_raster)
+
+
diff --git a/src/scripts/preparation/Python/Get_SoilGrids.py b/src/scripts/preparation/Python/Get_SoilGrids.py
new file mode 100644
index 0000000..c01638c
--- /dev/null
+++ b/src/scripts/preparation/Python/Get_SoilGrids.py
@@ -0,0 +1,48 @@
+
+########################################################################
+## Script name: Distance_to_water
+## Purpose of script: Calculate the distance from water bodies (navigable rivers,
+## small rivers and streams, lakes and lagoons)
+## Author: Chenyu Shen
+## Date Created: 2023-11-15
+## Notes:
+########################################################################
+
+
+from owslib.wcs import WebCoverageService
+import rasterio
+from rasterio import plot
+maps=['https://maps.isric.org/mapserv?map=/map/ocs.map','https://maps.isric.org/mapserv?map=/map/sand.map']
+identifiers=['ocs_0-30cm_mean','sand_0-30cm_mean']
+file_names=['./data/Peru_soil_carbon_stock_0-5_mean.tif','./data/Peru_sand_carbon_stock_0-5_mean.tif']
+for i in range(len(maps)):
+
+ map=maps[i]
+ identifier=identifiers[i]
+ file_name=file_names[i]
+
+ print(f"start download: {file_name}" )
+ wcs = WebCoverageService(map, version='1.0.0')
+
+ # A bounding box broadly matching Senegal:
+ bbox = (-81.81, -18.80, -68.15, 0.32)
+
+ # The getCoverage method can now be used to fetch the map segment within the bounding box. Note the other parameters, Section 1 showed how to obtain them.
+ response = wcs.getCoverage(
+ identifier=identifier,
+ crs='urn:ogc:def:crs:EPSG:4326',
+ bbox=bbox,
+ resx=0.002, resy=0.002,
+ format='GEOTIFF_INT16')
+
+ # create a .tif file and save data into this file
+ with open(file_name, 'wb') as file:
+ file.write(response.read())
+ print(f"download complete: {file_name}" )
+ print("="*80)
+
+ # # read map data from the created file
+ # ph = rasterio.open("./data/Peru_soil_carbon_stock_0-5_mean.tif", driver="GTiff")
+ #
+ # # display the map
+ # plot.show(ph, title='Mean pH between 0 and 5 cm deep in Senegal', cmap='gist_ncar')
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Re-projection.py b/src/scripts/preparation/Python/Re-projection.py
new file mode 100644
index 0000000..80426fc
--- /dev/null
+++ b/src/scripts/preparation/Python/Re-projection.py
@@ -0,0 +1,44 @@
+import numpy as np
+import geopandas as gpd
+import rasterio
+from rasterio.warp import calculate_default_transform, reproject, Resampling
+
+# Load and Reproject the River Shapefile
+rivers_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Water/Rios/Rios.shp")
+lagoons_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Water/Lagunas/Lagunas.shp")
+# Determine the appropriate UTM zone for Peru and reproject
+utm_crs = 'EPSG:24892' # South American Datum 1969 (SAD69) / Peru Central zone
+rivers_gdf_utm = rivers_gdf.to_crs(utm_crs)
+lagoons_gdf_utm = lagoons_gdf.to_crs(utm_crs)
+
+# Save the Reprojected River Shapefile
+rivers_gdf_utm.to_file('./reprojected_rivers.shp')
+lagoons_gdf_utm.to_file('./reprojected_lagoons.shp')
+
+# Load the Template Raster
+with rasterio.open("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/ref_grid.tif") as src:
+ transform, width, height = calculate_default_transform(
+ src.crs, utm_crs, src.width, src.height, *src.bounds)
+ meta = src.meta.copy()
+ meta.update({
+ 'crs': utm_crs,
+ 'transform': transform,
+ 'width': width,
+ 'height': height
+ })
+
+ # Reproject the Template Raster
+ template = np.full((height, width), np.nan, dtype=rasterio.float32)
+ reproject(
+ source=rasterio.band(src, 1),
+ destination=template,
+ src_transform=src.transform,
+ src_crs=src.crs,
+ dst_transform=transform,
+ dst_crs=utm_crs,
+ resampling=Resampling.nearest)
+
+
+# Save the Reprojected Raster
+with rasterio.open('./reprojected_raster.tif', 'w', **meta) as dst:
+ dst.write(template, 1)
diff --git a/src/scripts/preparation/Python/updated_data_processing_script.py b/src/scripts/preparation/Python/updated_data_processing_script.py
new file mode 100644
index 0000000..aea7a3d
--- /dev/null
+++ b/src/scripts/preparation/Python/updated_data_processing_script.py
@@ -0,0 +1,32 @@
+
+import pandas as pd
+import os
+
+def process_file(file_path):
+ # Extracting a prefix from the file name (assuming file name format is like 'Copper_reserves.xlsx')
+ file_name = os.path.basename(file_path)
+ prefix = ''.join([word[0] for word in file_name.split('_')]).lower()
+
+ # Load the Excel file
+ df = pd.read_excel(file_path, sheet_name=0, skiprows=4)
+
+ # Rename the first column to 'DEPARTMENT' for clarity
+ df.rename(columns={df.columns[0]: 'DEPARTMENT'}, inplace=True)
+
+ # Drop rows at the end that are part of notes and sources if any
+ df = df.dropna(subset=['DEPARTMENT'])
+
+ # Replace commas with dots and remove dashes
+ df.replace({',': '.', '-': ''}, regex=True, inplace=True)
+
+ # Abbreviate column names from the second column onwards
+ columns = df.columns.tolist()
+ abbreviated_columns = [columns[0]] + [f'{prefix}_{col}' for col in columns[1:]]
+ df.columns = abbreviated_columns
+
+ # Save the updated dataframe to CSV
+ output_csv_file_path = file_path.replace('.xlsx', '_processed.csv')
+ df.to_csv(output_csv_file_path, index=False)
+
+# Example usage
+process_file('path_to_your_excel_file.xlsx')
diff --git a/src/scripts/preparation/convert_stat_to_tif.R b/src/scripts/preparation/convert_stat_to_tif.R
new file mode 100644
index 0000000..0df5ffb
--- /dev/null
+++ b/src/scripts/preparation/convert_stat_to_tif.R
@@ -0,0 +1,50 @@
+########################################################################
+## Script name: convert_stat_to_tif
+## Purpose of script: Convert Statistics from INEI to raster files
+## Author: Chenyu Shen
+## Date Created: 2023-12-04
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("sf", "terra", "dplyr")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+# Load the shapefile of departments in Peru
+peru_departments <- st_read("Preds/Raw/Shapefiles/Departamento/LIM_DEPARTAMENTO.shp")
+
+# Load the population data (assuming it's a CSV file; adjust as needed)
+population_data <- read.csv("Population.csv")
+
+# Load the reference raster
+template_raster <- rast("utils/ref_grid.tif")
+
+
+### =========================================================================
+### B - Assign Population values as raster values
+### =========================================================================
+
+# Merge the shapefile with the population data
+# Ensure that both datasets have a common column for merging (e.g., department name)
+merged_data <- merge(peru_departments, population_data, by = "NOMBDEP")
+
+# Rasterize the population data
+population_raster <- terra::rasterize(merged_data, template_raster, "X2017", fun = sum)
+
+# Export as TIFF
+terra::writeRaster(population_raster, "population_map.tif", overwrite = TRUE)
diff --git a/src/scripts/preparation/smaller_extent.R b/src/scripts/preparation/smaller_extent.R
new file mode 100644
index 0000000..f435a13
--- /dev/null
+++ b/src/scripts/preparation/smaller_extent.R
@@ -0,0 +1,79 @@
+library(terra)
+library(sf)
+
+# Set working directory
+setwd(Data_dir)
+
+# Load the river shapefile with sf
+rivers <- st_read("Preds/Raw/Shapefiles/navigable_river/Rio_navegables.shp")
+
+# Convert sf object to SpatVector object
+rivers_vect <- vect(rivers)
+
+# Load the reference raster
+template_raster <- rast("ref_grid.tif")
+
+# Rasterize the river shapefile
+# Create a raster where river cells are 1 and others are NA
+rivers_rasterized <- rasterize(rivers_vect, template_raster, field=1)
+
+# Load and reproject Peru's boundary to match the CRS of the water body rasters
+peru_boundary <- st_read("Preds/Raw/Shapefiles/Country/nivel-politico-1.shp")
+peru_boundary_transformed <- st_transform(peru_boundary, crs = crs(template_raster))
+
+# Clip the rasters to Peru's boundary
+rivers_clipped <- mask(rivers_rasterized, peru_boundary_transformed)
+
+# Define a smaller extent
+# obtain the extent of the original raster
+rows <- nrow(rivers_rasterized)
+cols <- ncol(rivers_rasterized)
+
+# Calculate the center coordinates of the raster
+center_row <- rows / 2
+center_col <- cols / 2
+
+# Calculate the start and end rows and columns for cropping
+start_row <- center_row - 2128 / 2
+end_row <- center_row + 2128 / 2
+start_col <- center_col - 1520 / 2
+end_col <- center_col + 1520 / 2
+
+# Convert row and column indices to coordinates
+start_x <- xFromCol(rivers_rasterized, start_col)
+end_x <- xFromCol(rivers_rasterized, end_col)
+start_y <- yFromRow(rivers_rasterized, end_row)
+end_y <- yFromRow(rivers_rasterized, start_row)
+
+# Create the extent
+small_extent <- terra::ext(start_x, end_x, start_y, end_y)
+
+# Crop the raster
+small_raster <- crop(rivers_rasterized, small_extent)
+
+# Calculate the distance from each cell in the template raster to the nearest river
+distance_to_river <- distance(small_raster)
+
+# Basic plot with terra
+plot(distance_to_river,
+ main="Distance to River",
+ col=viridis::viridis(100),
+ xlab="Longitude", ylab="Latitude")
+
+# Advanced version using ggplot
+
+library(ggplot2)
+library(viridis)
+
+# Convert raster to a data frame for ggplot
+raster_df <- as.data.frame(distance_to_river, xy=TRUE)
+
+# Use ggplot to plot
+ggplot(raster_df, aes(x = x, y = y, fill = layer)) +
+ geom_tile() +
+ scale_fill_viridis(name="Distance to River", option="D") +
+ labs(title="Distance to River", x="Longitude", y="Latitude") +
+ coord_fixed()
+
+# Write the output as Tiff files
+writeRaster(distance_to_river, "distance_to_river.tif")
diff --git a/src/utils/Geographical_Region/region-geografica.dbf b/src/utils/Geographical_Region/region-geografica.dbf
new file mode 100644
index 0000000..4cbc0d7
Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.dbf differ
diff --git a/src/utils/Geographical_Region/region-geografica.prj b/src/utils/Geographical_Region/region-geografica.prj
new file mode 100644
index 0000000..3867261
--- /dev/null
+++ b/src/utils/Geographical_Region/region-geografica.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_SIRGAS_2000",DATUM["D_SIRGAS_2000",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
\ No newline at end of file
diff --git a/src/utils/Geographical_Region/region-geografica.shp b/src/utils/Geographical_Region/region-geografica.shp
new file mode 100644
index 0000000..392a3e4
Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.shp differ
diff --git a/src/utils/Geographical_Region/region-geografica.shx b/src/utils/Geographical_Region/region-geografica.shx
new file mode 100644
index 0000000..42717aa
Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.shx differ
diff --git a/src/utils/predictor_table.xlsx b/src/utils/predictor_table.xlsx
index e84a446..7e49921 100644
Binary files a/src/utils/predictor_table.xlsx and b/src/utils/predictor_table.xlsx differ