diff --git a/src/scripts/LULCC/preparation/Get_SoilGrids.py b/src/scripts/LULCC/preparation/Get_SoilGrids.py
new file mode 100644
index 0000000..78de84f
--- /dev/null
+++ b/src/scripts/LULCC/preparation/Get_SoilGrids.py
@@ -0,0 +1,70 @@
+from owslib.wcs import WebCoverageService
+import rasterio
+import matplotlib.pyplot as plt
+from rasterio import plot
+
+# WCS URLs for different soil properties
+wcs_urls = {
+    'phh2o': 'http://maps.isric.org/mapserv?map=/map/phh2o.map',
+    'bdod': 'http://maps.isric.org/mapserv?map=/map/bdod.map',
+    'cec': 'http://maps.isric.org/mapserv?map=/map/cec.map',
+    'cfvo': 'https://maps.isric.org/mapserv?map=/map/cfvo.map',
+    'clay': 'https://maps.isric.org/mapserv?map=/map/clay.map',
+    'nitrogen': 'https://maps.isric.org/mapserv?map=/map/nitrogen.map',
+    'sand': 'https://maps.isric.org/mapserv?map=/map/sand.map',
+    'silt': 'https://maps.isric.org/mapserv?map=/map/silt.map',
+    'soc': 'https://maps.isric.org/mapserv?map=/map/soc.map',
+<<<<<<< HEAD
+=======
+    'ocs': 'https://maps.isric.org/mapserv?map=/map/ocs.map',
+>>>>>>> 67f953ec92d5f3e8523aaa160e6654c867c0ebd5
+    'ocd': 'https://maps.isric.org/mapserv?map=/map/ocd.map'
+
+    # Add other WCS URLs here
+    # ...
+}
+
+# Soil properties
+<<<<<<< HEAD
+soil_properties = ['phh2o', 'bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'sand', 'silt', 'soc', 'ocd']
+=======
+soil_properties = ['phh2o', 'bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'sand', 'silt', 'soc', 'ocd', 'ocs']
+>>>>>>> 67f953ec92d5f3e8523aaa160e6654c867c0ebd5
+
+# Depth intervals
+depth_intervals = ['0-5cm', '5-15cm', '15-30cm', '30-60cm', '60-100cm', '100-200cm']
+
+# A bounding box broadly matching Peru
+bbox = (-81.81, -18.80, -68.15, 0.32)
+
+for property in soil_properties:
+    wcs_url = wcs_urls.get(property, '')
+    if wcs_url:
+        wcs = WebCoverageService(wcs_url, version='1.0.0')
+
+        for depth in depth_intervals:
+            identifier = f'{property}_{depth}_mean'
+            file_name = f'./data/{property}_{depth}_mean.tif'
+            title = f'Mean {property} between {depth} deep in Peru'
+
+            # Fetch the map segment within the bounding box
+            response = wcs.getCoverage(
+                identifier=identifier,
+                crs='urn:ogc:def:crs:EPSG:4326',
+                bbox=bbox,
+                resx=0.002, resy=0.002,
+                format='GEOTIFF_INT16')
+
+            # Fetch the coverage for Peru and save it to disk
+            with open(file_name, 'wb') as file:
+                file.write(response.read())
+                print(f"download complete: {file_name}")
+
+            # Open the file from disk
+            ph = rasterio.open(file_name, driver="GTiff")
+
+            # Use the plot class to plot the map
+            plot.show(ph, title=title, cmap='gist_ncar')
+            plt.show()
+    else:
+        print(f"WCS URL for {property} not defined.")
diff --git a/src/scripts/LULCC/preparation/Rasterize_shapefile.R b/src/scripts/LULCC/preparation/Rasterize_shapefile.R
new file mode 100644
index 0000000..44602f2
--- /dev/null
+++ b/src/scripts/LULCC/preparation/Rasterize_shapefile.R
@@ -0,0 +1,66 @@
+########################################################################
+## Script name: Rasterize_shapefile
+## Purpose of script: rasterize the shapefils of three geographical regions in Peru  
+## Author: Chenyu Shen
+## Date Created: 2023-10-27
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("terra")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd("C:/Work/Peru-env-futures")
+
+# Read in the shapefile
+regions <- vect("src/utils/Geographical_Region/region-geografica.shp")
+
+# Read in the reference raster
+ref_raster <- rast("src/utils/ref_grid.tif")
+
+
+### =========================================================================
+### B - Check and Transform CRS
+### =========================================================================
+
+# Check CRS of both files
+crs_regions <- crs(regions)
+crs_ref <- crs(ref_raster)
+
+# Transform if necessary
+if (crs_regions != crs_ref) {
+  regions <- project(regions, crs_ref)
+}
+
+
+### =========================================================================
+### C - Rasterize the shapefile
+### =========================================================================
+
+# Access and view the attibutes table of the shapefile
+attributes <- values(regions)
+print(attributes)
+
+# Rasterize
+rasterized_regions <- rasterize(regions, ref_raster, field="nombre", fun="min")
+
+# Plot rasterized data
+plot(rasterized_regions, col=rainbow(3))
+
+# Overlay vector data (e.g., boundaries or points)
+plot(regions, add=TRUE)
+
+writeRaster(rasterized_regions, "src/utils/rasterized_regions.tif")
diff --git a/src/scripts/LULCC/preparation/Ref_grid_creation.R b/src/scripts/LULCC/preparation/Ref_grid_creation.R
index 0540149..3f9b43f 100644
--- a/src/scripts/LULCC/preparation/Ref_grid_creation.R
+++ b/src/scripts/LULCC/preparation/Ref_grid_creation.R
@@ -1,19 +1,19 @@
 ########################################################################
-## Script name: 
-## Purpose of script:
-## Author: 
+## Script name: Ref_grid_creation
+## Purpose of script: Set up uniform spatial grid for project as reference 
+## Author: Chenyu Shen
 ## Date Created: 2023-10-13
 ## Notes:
 ########################################################################
 
 ### =========================================================================
-### Preparation
+### A - Preparation
 ### =========================================================================
 
 ## Install and load packages
 
 #vector other required packages
-packs<-c()
+packs<-c("terra","raster")
 
 #install new packages
 new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
@@ -22,5 +22,64 @@ if (length(new.packs)) install.packages(new.packs)
 # Load required packages
 invisible(lapply(packs, require, character.only = TRUE))
 
-# Source custom functions
-invisible(sapply(list.files("Scripts/Functions",
\ No newline at end of file
+# Set working directory
+setwd(Data_dir)
+
+# Load one of the LULC layers
+input_raster <- rast("LULC/Copernicus/Peru_Land_Cover_2019.tif")
+
+
+
+### =========================================================================
+### B - Extract information and remove unnecessary cells
+### =========================================================================
+
+# Extract extent, CRS, and resolution information
+extent_info <- ext(input_raster)
+crs_info <- crs(input_raster)
+res_info <- res(input_raster)
+nrows <- nrow(input_raster)
+ncols <- ncol(input_raster)
+
+
+# Create a reference grid with the same extent, CRS, and resolution
+ref_grid <- rast(extent_info, nrows = nrows, ncols = ncols, crs = crs_info, vals = NA)
+
+# Define the extent of desired region (xmin, xmax, ymin, ymax)
+desired_extent <- c(xmin = -81.81, xmax = -68.15, ymin = -18.80, ymax = 0.32)
+
+# Crop the reference grid to the extent of desired region
+cropped_gird <- crop(ref_grid, desired_extent)
+
+# Save the cropped raster 
+writeRaster(cropped_gird, "ref_grid.tif", overwrite = TRUE)
+
+
+### =========================================================================
+### C - Calculate the number of pixels within the Peru boundary 
+### =========================================================================
+
+# # Define the number of chunks (adjust this value as needed, more chunks for larger rasters)
+# num_chunks <- 100
+# 
+# # Get the number of rows per chunk
+# rows_per_chunk <- ceiling(nrow(input_raster) / num_chunks)
+# 
+# # Initialize the count
+# non_zero_pixels <- 0
+# 
+# # Loop over each chunk
+# for (i in 1:num_chunks) {
+#   # Define the start and end rows for the current chunk
+#   start_row <- ((i-1) * rows_per_chunk) + 1
+#   end_row <- min(i * rows_per_chunk, nrow(input_raster))
+#   
+#   # Retrieve the values for the chunk
+#   chunk <- input_raster[start_row:end_row, ]
+#   
+#     
+#   # Sum the non-zero values in the chunk and add to the total count
+#   non_zero_pixels <- non_zero_pixels + sum(chunk != 0, na.rm = TRUE)
+# }
+# 
+# print(non_zero_pixels)
diff --git a/src/scripts/LULCC/preparation/Write_to_excel.py b/src/scripts/LULCC/preparation/Write_to_excel.py
new file mode 100644
index 0000000..82aaad0
--- /dev/null
+++ b/src/scripts/LULCC/preparation/Write_to_excel.py
@@ -0,0 +1,42 @@
+import os
+import pandas as pd
+
+# Set the directory where your TIF files are located
+directory_path = 'C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Soil'
+relative_path_root = 'Peru_data_collab/Preds/Raw/Soil'
+
+# List all TIF files in the directory
+tif_files = [f for f in os.listdir(directory_path) if f.endswith('.tif')]
+
+# Load existing data from the Excel file
+excel_path = "C:/Users/chshen/OneDrive - ETH Zurich/Work/predictor_table.xlsx"  # Replace with the path to your Excel file
+existing_data = pd.read_excel(excel_path)
+
+# Append each TIF file path as a new row to the DataFrame
+for tif_file in tif_files:
+    # Extract the file name without the .tif extension to use as the 'Variable_name'
+    variable_name = os.path.splitext(tif_file)[0]
+
+    # Construct the relative file path
+    relative_file_path = os.path.join(relative_path_root, tif_file)
+
+    # Define the new data entry
+    new_data_entry = {
+        'Variable_name': variable_name,
+        'Data_citation': 'Soilgrid',
+        'URL': 'https://maps.isric.org/',
+        'Original_resolution': '250m',
+        'Static_or_dynamic': 'static',  # or 'dynamic' based on your data
+        'Temporal_coverage': '',
+        'Temporal_resolution': '',
+        'Prepared': 'No',
+        'Raw_data_path': relative_file_path,
+        # ... add other fields as necessary
+    }
+
+    # Convert the dictionary to a DataFrame to ensure similar structure
+    new_data_df = pd.DataFrame([new_data_entry])
+    existing_data = pd.concat([existing_data, new_data_df], ignore_index=True)
+
+# Save the updated DataFrame back to an Excel file
+existing_data.to_excel(excel_path, index=False)
diff --git a/src/scripts/preparation/Aggregate_LULC.R b/src/scripts/preparation/Aggregate_LULC.R
new file mode 100644
index 0000000..b74f7cc
--- /dev/null
+++ b/src/scripts/preparation/Aggregate_LULC.R
@@ -0,0 +1,55 @@
+########################################################################
+## Script name: Aggregate_LULC
+## Purpose of script: Aggregate the MapBiomas LULC data to 120m resolution
+## Author: Chenyu Shen
+## Date Created: 2023-11-20
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("terra","raster","sp")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+# Set output directory
+output_dir <- "LULC/MapBiomas_120m"
+
+### =========================================================================
+### B - Aggregate the Data
+### =========================================================================
+for (year in 1985:2021) {
+  # Construct file path for the input raster
+  input_path <- sprintf("LULC/MapBiomas/peru_coverage_%d.tif", year)
+  
+  # Check if the file exists
+  if (!file.exists(input_path)) {
+    next  # Skip to the next iteration if the file does not exist
+  }
+  
+  # Load the raster
+  lulc_30m <- raster(input_path)
+  
+  # Aggregate to 120m resolution
+  agg_factor <- 4  # 30m to 120m needs a factor of 4
+  lulc_120m <- aggregate(lulc_30m, fact=agg_factor, fun=modal, na.rm=TRUE)
+  
+  # Construct file path for the output raster
+  output_path <- sprintf("%s/peru_coverage_120m_%d.tif", output_dir, year)
+  
+  # Save the aggregated raster
+  writeRaster(lulc_120m, filename=output_path, overwrite=TRUE)
+}
\ No newline at end of file
diff --git a/src/scripts/preparation/Distance_to_water.R b/src/scripts/preparation/Distance_to_water.R
new file mode 100644
index 0000000..ec5d7d3
--- /dev/null
+++ b/src/scripts/preparation/Distance_to_water.R
@@ -0,0 +1,65 @@
+########################################################################
+## Script name: Distance_to_water
+## Purpose of script: Calculate the distance from water bodies (navigable rivers, 
+## small rivers and streams, lakes and lagoons)
+## Author: Chenyu Shen
+## Date Created: 2023-11-15
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("raster", "sf", "terra")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+
+# Load the shapefiles of waterbody
+navigable_rivers <- st_read("Preds/Raw/Shapefiles/navigable_river/Rio_navegables.shp")
+small_rivers <- st_read("Preds/Raw/Shapefiles/river_stream/Rios_Quebradas.shp")
+lakes_lagoons <- st_read("Preds/Raw/Shapefiles/lake_lagoon/Lagos_lagunas_Project.shp")
+
+# Load and reproject Peru's boundary to match the CRS of the water body rasters
+peru_boundary <- st_read("Preds/Raw/Shapefiles/Country/nivel-politico-1.shp")
+template_raster <- rast("ref_grid.tif")
+peru_boundary_transformed <- st_transform(peru_boundary, crs = crs(template_raster))
+
+### =========================================================================
+### B - Calculate Distance
+### =========================================================================
+
+# Rasterize the Shapefiles
+raster_navigable_rivers <- rasterize(navigable_rivers, template_raster,field=1)
+raster_small_rivers <- rasterize(small_rivers, template_raster,field=1)
+raster_lakes_lagoons <- rasterize(lakes_lagoons, template_raster,field=1)  
+# field=1 is used to mark the presence of the feature. 
+# It's a placeholder to indicate where the features are in the raster grid
+
+# Clip the rasters to Peru's boundary
+raster_navigable_rivers_clipped <- mask(raster_navigable_rivers, peru_boundary_transformed)
+raster_small_rivers_clipped <- mask(raster_small_rivers, peru_boundary_transformed)
+raster_lakes_lagoons_clipped <- mask(raster_lakes_lagoons, peru_boundary_transformed)
+
+# Calculate Distance within Peru's boundary
+distance_navigable_rivers <- terra::distance(raster_navigable_rivers_clipped)
+distance_small_rivers <- terra::distance(raster_small_rivers_clipped)
+distance_lakes_lagoons <- terra::distance(raster_lakes_lagoons_clipped)
+
+
+# Write the output as Tiff files
+writeRaster(distance_navigable_rivers, "distance_navigable_rivers.tif")
+writeRaster(distance_small_rivers, "distance_small_rivers.tif")
+writeRaster(distance_lakes_lagoons, "distance_lakes_lagoons.tif")
diff --git a/src/scripts/preparation/Plot_LULC.R b/src/scripts/preparation/Plot_LULC.R
new file mode 100644
index 0000000..9952217
--- /dev/null
+++ b/src/scripts/preparation/Plot_LULC.R
@@ -0,0 +1,100 @@
+########################################################################
+## Script name: Plot_LULC
+## Purpose of script:Plot LULC data
+## Author: Chenyu Shen
+## Date Created: 2023-10-23
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("terra","raster")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+# Load all the LULC files
+years <- 1985:1990
+LULC_list <- list()
+
+for(year in years){
+  path <- paste0("LULC/peru_coverage_",year,".tif")
+  LULC_list[[as.character(year)]] <- rast(path)
+}
+
+
+
+my_colors <- c(
+  "3" = "#006400",
+  "4" = "#32CD32",
+  "5" = "#687537",
+  "6" = "#76A5AF",
+  "9" = "#935132",
+  "11" = "#45C2A5",
+  "12" = "#B8AF4F",
+  "13" = "#F1C232",
+  "15" = "#FFD966",
+  "18" = "#E974ED",
+  "21" = "#FFEFC3",
+  "24" = "#AA0000",
+  "30" = "#FF0000",
+  "25" = "#FF8585",
+  "33" = "#0000FF",
+  "34" = "#4FD3FF",
+  "27" = "#D5D5E5"
+)
+
+labels <- c(
+  "3" = "Forest",
+  "4" = "Dry forest",
+  "5" = "Mangrove",
+  "6" = "Flooded forest",
+  "9" = "Forest plantation",
+  "11" = "Wetland",
+  "12" = "Grasslands/herbaceous",
+  "13" = "Scrubland and other non forest formations",
+  "15" = "Pasture",
+  "18" = "Agriculture",
+  "21" = "Agricultural mosaic",
+  "24" = "Infrastructure",
+  "30" = "Mining",
+  "25" = "Other non vegetated area",
+  "33" = "River,lake and ocean",
+  "34" = "Glacier",
+  "27" = "Not observed"
+)
+
+land_cover_map <- LULC_list[["1985"]]
+
+# Get unique values using freq
+unique_vals <- freq(land_cover_map)[, 2]
+
+# Match unique values with the predefined colors and labels
+plot_colors <- my_colors[as.character(unique_vals)]
+plot_labels <- labels[as.character(unique_vals)]
+
+
+
+# 设置布局为1行2列，主要为了绘制地图和图例
+layout(matrix(1:2, nrow=1), widths=c(4, 1))
+
+# 在第一个区域绘制土地利用图
+par(mar=c(5, 5, 4, 2)) # 调整边距
+plot(land_cover_map, col=plot_colors, main=paste("Land Cover Map", year), axes=TRUE, legend=FALSE)
+
+# 在第二个区域绘制图例
+par(mar=c(5, 2, 4, 2)) # 调整边距
+plot.new()
+legend("center", legend=plot_labels, fill=plot_colors, title="Land Cover Types", cex=0.5, bty="n")
diff --git a/src/scripts/preparation/Python/.idea/.gitignore b/src/scripts/preparation/Python/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml b/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/misc.xml b/src/scripts/preparation/Python/.idea/misc.xml
new file mode 100644
index 0000000..9de2865
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.10" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/modules.xml b/src/scripts/preparation/Python/.idea/modules.xml
new file mode 100644
index 0000000..2a08ed2
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/preparation.iml" filepath="$PROJECT_DIR$/.idea/preparation.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/preparation.iml b/src/scripts/preparation/Python/.idea/preparation.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/preparation.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/.idea/vcs.xml b/src/scripts/preparation/Python/.idea/vcs.xml
new file mode 100644
index 0000000..77a3cc7
--- /dev/null
+++ b/src/scripts/preparation/Python/.idea/vcs.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../../../.." vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg b/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg
new file mode 100644
index 0000000..cd89cb9
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg
@@ -0,0 +1 @@
+ISO-8859-1
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf b/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf
new file mode 100644
index 0000000..6c144a5
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.prj b/src/scripts/preparation/Python/Data/reprojected_lagoons.prj
new file mode 100644
index 0000000..e9ae3f0
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_lagoons.prj
@@ -0,0 +1 @@
+PROJCS["Peru_Central_Zone",GEOGCS["GCS_Provisional_S_American_1956",DATUM["D_Provisional_S_American_1956",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",720000.0],PARAMETER["False_Northing",1039979.159],PARAMETER["Central_Meridian",-76.0],PARAMETER["Scale_Factor",0.99932994],PARAMETER["Latitude_Of_Origin",-9.5],UNIT["Meter",1.0]]
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.shp b/src/scripts/preparation/Python/Data/reprojected_lagoons.shp
new file mode 100644
index 0000000..343bbce
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.shp differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.shx b/src/scripts/preparation/Python/Data/reprojected_lagoons.shx
new file mode 100644
index 0000000..59d8d95
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.shx differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.cpg b/src/scripts/preparation/Python/Data/reprojected_rivers.cpg
new file mode 100644
index 0000000..cd89cb9
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_rivers.cpg
@@ -0,0 +1 @@
+ISO-8859-1
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.dbf b/src/scripts/preparation/Python/Data/reprojected_rivers.dbf
new file mode 100644
index 0000000..aab7647
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.dbf differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.prj b/src/scripts/preparation/Python/Data/reprojected_rivers.prj
new file mode 100644
index 0000000..e9ae3f0
--- /dev/null
+++ b/src/scripts/preparation/Python/Data/reprojected_rivers.prj
@@ -0,0 +1 @@
+PROJCS["Peru_Central_Zone",GEOGCS["GCS_Provisional_S_American_1956",DATUM["D_Provisional_S_American_1956",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",720000.0],PARAMETER["False_Northing",1039979.159],PARAMETER["Central_Meridian",-76.0],PARAMETER["Scale_Factor",0.99932994],PARAMETER["Latitude_Of_Origin",-9.5],UNIT["Meter",1.0]]
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.shp b/src/scripts/preparation/Python/Data/reprojected_rivers.shp
new file mode 100644
index 0000000..949be16
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.shp differ
diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.shx b/src/scripts/preparation/Python/Data/reprojected_rivers.shx
new file mode 100644
index 0000000..e828388
Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.shx differ
diff --git a/src/scripts/preparation/Python/DistanceToWater.py b/src/scripts/preparation/Python/DistanceToWater.py
new file mode 100644
index 0000000..712056a
--- /dev/null
+++ b/src/scripts/preparation/Python/DistanceToWater.py
@@ -0,0 +1,72 @@
+import geopandas as gpd
+import rasterio
+from rasterio.features import rasterize
+from scipy.ndimage import distance_transform_edt
+import numpy as np
+from rasterio.mask import mask
+
+def calculate_distance_to_river(river_shapefile, template_raster_path, meta):
+    rivers_gdf = gpd.read_file(river_shapefile)
+    with rasterio.open(template_raster_path) as src:
+        river_raster = rasterize(
+            [(geometry, 1) for geometry in rivers_gdf.geometry],
+            out_shape=(src.height, src.width),
+            transform=src.transform,
+            fill=0,
+            all_touched=True,
+            dtype='float32'
+        )
+
+        # Compute Distance to Nearest River
+        inverse_river_raster = np.where(river_raster == 1, 0, 1)
+        distance = distance_transform_edt(inverse_river_raster) * src.res[0]
+
+        return distance
+
+# Paths to the reprojected river shapefiles
+river_shapefile = './Data/reprojected_rivers.shp'
+lagoon_shapefile = './Data/reprojected_lagoons.shp'
+template_raster_path = './Data/reprojected_raster.tif'
+
+# Extract metadata from the template raster
+with rasterio.open(template_raster_path) as src:
+    meta = src.meta.copy()
+
+# Calculate distances
+distance_river = calculate_distance_to_river(river_shapefile, template_raster_path, meta)
+distance_lagoon = calculate_distance_to_river(lagoon_shapefile, template_raster_path, meta)
+
+# Path Peru boundary shapefile
+peru_boundary_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Peru_admin_boud/per_admbnda_adm0_ign_20200714.shp")
+utm_crs = 'EPSG:24892'
+peru_boundary_utm = peru_boundary_gdf.to_crs(utm_crs)
+
+# Clip the distance rater to the boundary Peru
+def clip_raster_with_shapefile(raster_path, shapefile_path, output_raster_path):
+    # Load the boundary shapefile
+    boundary_gdf = gpd.read_file(shapefile_path)
+
+    # Load the raster to be clipped
+    with rasterio.open(raster_path) as src:
+        out_image, out_transform = mask(src, boundary_gdf.geometry, crop=True)
+        out_meta = src.meta.copy()
+
+        # Update the metadata to reflect the new raster dimensions
+        out_meta.update({"driver": "GTiff",
+                         "height": out_image.shape[1],
+                         "width": out_image.shape[2],
+                         "transform": out_transform})
+
+        # Save the clipped raster
+        with rasterio.open(output_raster_path, 'w', **out_meta) as dest:
+            dest.write(out_image)
+
+# File paths
+clipped_river_raster = 'clipped_river_raster.tif'
+clipped_lagoon_raster = 'clipped_lagoon_raster.tif'
+
+# Clip the rasters
+clip_raster_with_shapefile(distance_river, peru_boundary_utm, clipped_river_raster)
+clip_raster_with_shapefile(distance_lagoon, peru_boundary_utm, clipped_lagoon_raster)
+
+
diff --git a/src/scripts/preparation/Python/Get_SoilGrids.py b/src/scripts/preparation/Python/Get_SoilGrids.py
new file mode 100644
index 0000000..c01638c
--- /dev/null
+++ b/src/scripts/preparation/Python/Get_SoilGrids.py
@@ -0,0 +1,48 @@
+
+########################################################################
+## Script name: Distance_to_water
+## Purpose of script: Calculate the distance from water bodies (navigable rivers,
+## small rivers and streams, lakes and lagoons)
+## Author: Chenyu Shen
+## Date Created: 2023-11-15
+## Notes:
+########################################################################
+
+
+from owslib.wcs import WebCoverageService
+import rasterio
+from rasterio import plot
+maps=['https://maps.isric.org/mapserv?map=/map/ocs.map','https://maps.isric.org/mapserv?map=/map/sand.map']
+identifiers=['ocs_0-30cm_mean','sand_0-30cm_mean']
+file_names=['./data/Peru_soil_carbon_stock_0-5_mean.tif','./data/Peru_sand_carbon_stock_0-5_mean.tif']
+for i in range(len(maps)):
+
+    map=maps[i]
+    identifier=identifiers[i]
+    file_name=file_names[i]
+
+    print(f"start download: {file_name}" )
+    wcs = WebCoverageService(map, version='1.0.0')
+
+    # A bounding box broadly matching Senegal:
+    bbox = (-81.81, -18.80, -68.15, 0.32)
+
+    # The getCoverage method can now be used to fetch the map segment within the bounding box. Note the other parameters, Section 1 showed how to obtain them.
+    response = wcs.getCoverage(
+        identifier=identifier,
+        crs='urn:ogc:def:crs:EPSG:4326',
+        bbox=bbox,
+        resx=0.002, resy=0.002,
+        format='GEOTIFF_INT16')
+
+    # create  a .tif file and save data into this file
+    with open(file_name, 'wb') as file:
+        file.write(response.read())
+    print(f"download complete: {file_name}" )
+    print("="*80)
+
+    # # read map data from the created file
+    # ph = rasterio.open("./data/Peru_soil_carbon_stock_0-5_mean.tif", driver="GTiff")
+    #
+    # # display the map
+    # plot.show(ph, title='Mean pH between 0 and 5 cm deep in Senegal', cmap='gist_ncar')
\ No newline at end of file
diff --git a/src/scripts/preparation/Python/Re-projection.py b/src/scripts/preparation/Python/Re-projection.py
new file mode 100644
index 0000000..80426fc
--- /dev/null
+++ b/src/scripts/preparation/Python/Re-projection.py
@@ -0,0 +1,44 @@
+import numpy as np
+import geopandas as gpd
+import rasterio
+from rasterio.warp import calculate_default_transform, reproject, Resampling
+
+# Load and Reproject the River Shapefile
+rivers_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Water/Rios/Rios.shp")
+lagoons_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Water/Lagunas/Lagunas.shp")
+# Determine the appropriate UTM zone for Peru and reproject
+utm_crs = 'EPSG:24892'  # South American Datum 1969 (SAD69) / Peru Central zone
+rivers_gdf_utm = rivers_gdf.to_crs(utm_crs)
+lagoons_gdf_utm = lagoons_gdf.to_crs(utm_crs)
+
+# Save the Reprojected River Shapefile
+rivers_gdf_utm.to_file('./reprojected_rivers.shp')
+lagoons_gdf_utm.to_file('./reprojected_lagoons.shp')
+
+# Load the Template Raster
+with rasterio.open("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/ref_grid.tif") as src:
+    transform, width, height = calculate_default_transform(
+        src.crs, utm_crs, src.width, src.height, *src.bounds)
+    meta = src.meta.copy()
+    meta.update({
+        'crs': utm_crs,
+        'transform': transform,
+        'width': width,
+        'height': height
+    })
+
+    # Reproject the Template Raster
+    template = np.full((height, width), np.nan, dtype=rasterio.float32)
+    reproject(
+        source=rasterio.band(src, 1),
+        destination=template,
+        src_transform=src.transform,
+        src_crs=src.crs,
+        dst_transform=transform,
+        dst_crs=utm_crs,
+        resampling=Resampling.nearest)
+
+
+# Save the Reprojected Raster
+with rasterio.open('./reprojected_raster.tif', 'w', **meta) as dst:
+    dst.write(template, 1)
diff --git a/src/scripts/preparation/Python/updated_data_processing_script.py b/src/scripts/preparation/Python/updated_data_processing_script.py
new file mode 100644
index 0000000..aea7a3d
--- /dev/null
+++ b/src/scripts/preparation/Python/updated_data_processing_script.py
@@ -0,0 +1,32 @@
+
+import pandas as pd
+import os
+
+def process_file(file_path):
+    # Extracting a prefix from the file name (assuming file name format is like 'Copper_reserves.xlsx')
+    file_name = os.path.basename(file_path)
+    prefix = ''.join([word[0] for word in file_name.split('_')]).lower()
+
+    # Load the Excel file
+    df = pd.read_excel(file_path, sheet_name=0, skiprows=4)
+
+    # Rename the first column to 'DEPARTMENT' for clarity
+    df.rename(columns={df.columns[0]: 'DEPARTMENT'}, inplace=True)
+
+    # Drop rows at the end that are part of notes and sources if any
+    df = df.dropna(subset=['DEPARTMENT'])
+
+    # Replace commas with dots and remove dashes
+    df.replace({',': '.', '-': ''}, regex=True, inplace=True)
+
+    # Abbreviate column names from the second column onwards
+    columns = df.columns.tolist()
+    abbreviated_columns = [columns[0]] + [f'{prefix}_{col}' for col in columns[1:]]
+    df.columns = abbreviated_columns
+
+    # Save the updated dataframe to CSV
+    output_csv_file_path = file_path.replace('.xlsx', '_processed.csv')
+    df.to_csv(output_csv_file_path, index=False)
+
+# Example usage
+process_file('path_to_your_excel_file.xlsx')
diff --git a/src/scripts/preparation/convert_stat_to_tif.R b/src/scripts/preparation/convert_stat_to_tif.R
new file mode 100644
index 0000000..0df5ffb
--- /dev/null
+++ b/src/scripts/preparation/convert_stat_to_tif.R
@@ -0,0 +1,50 @@
+########################################################################
+## Script name: convert_stat_to_tif
+## Purpose of script: Convert Statistics from INEI to raster files
+## Author: Chenyu Shen 
+## Date Created: 2023-12-04
+## Notes:
+########################################################################
+
+### =========================================================================
+### A - Preparation
+### =========================================================================
+
+## Install and load packages
+
+#vector other required packages
+packs<-c("sf", "terra", "dplyr")
+
+#install new packages
+new.packs <- packs[!(packs %in% installed.packages()[, "Package"])]
+if (length(new.packs)) install.packages(new.packs)
+
+# Load required packages
+invisible(lapply(packs, require, character.only = TRUE))
+
+# Set working directory
+setwd(Data_dir)
+
+# Load the shapefile of departments in Peru 
+peru_departments <- st_read("Preds/Raw/Shapefiles/Departamento/LIM_DEPARTAMENTO.shp")
+
+# Load the population data (assuming it's a CSV file; adjust as needed)
+population_data <- read.csv("Population.csv")
+
+# Load the reference raster
+template_raster <- rast("utils/ref_grid.tif")
+
+
+### =========================================================================
+### B - Assign Population values as raster values
+### =========================================================================
+
+# Merge the shapefile with the population data
+# Ensure that both datasets have a common column for merging (e.g., department name)
+merged_data <- merge(peru_departments, population_data, by = "NOMBDEP")
+
+# Rasterize the population data
+population_raster <- terra::rasterize(merged_data, template_raster, "X2017", fun = sum)
+
+# Export as TIFF
+terra::writeRaster(population_raster, "population_map.tif", overwrite = TRUE)
diff --git a/src/scripts/preparation/smaller_extent.R b/src/scripts/preparation/smaller_extent.R
new file mode 100644
index 0000000..f435a13
--- /dev/null
+++ b/src/scripts/preparation/smaller_extent.R
@@ -0,0 +1,79 @@
+library(terra)
+library(sf)
+
+# Set working directory
+setwd(Data_dir)
+
+# Load the river shapefile with sf
+rivers <- st_read("Preds/Raw/Shapefiles/navigable_river/Rio_navegables.shp")
+
+# Convert sf object to SpatVector object
+rivers_vect <- vect(rivers)
+
+# Load the reference raster
+template_raster <- rast("ref_grid.tif")
+
+# Rasterize the river shapefile
+# Create a raster where river cells are 1 and others are NA
+rivers_rasterized <- rasterize(rivers_vect, template_raster, field=1)
+
+# Load and reproject Peru's boundary to match the CRS of the water body rasters
+peru_boundary <- st_read("Preds/Raw/Shapefiles/Country/nivel-politico-1.shp")
+peru_boundary_transformed <- st_transform(peru_boundary, crs = crs(template_raster))
+
+# Clip the rasters to Peru's boundary
+rivers_clipped <- mask(rivers_rasterized, peru_boundary_transformed)
+
+# Define a smaller extent 
+# obtain the extent of the original raster 
+rows <- nrow(rivers_rasterized)
+cols <- ncol(rivers_rasterized)
+
+# Calculate the center coordinates of the raster
+center_row <- rows / 2
+center_col <- cols / 2
+
+# Calculate the start and end rows and columns for cropping
+start_row <- center_row - 2128 / 2
+end_row <- center_row + 2128 / 2
+start_col <- center_col - 1520 / 2  
+end_col <- center_col + 1520 / 2
+
+# Convert row and column indices to coordinates
+start_x <- xFromCol(rivers_rasterized, start_col)
+end_x <- xFromCol(rivers_rasterized, end_col)
+start_y <- yFromRow(rivers_rasterized, end_row)
+end_y <- yFromRow(rivers_rasterized, start_row)
+
+# Create the extent
+small_extent <- terra::ext(start_x, end_x, start_y, end_y)
+
+# Crop the raster
+small_raster <- crop(rivers_rasterized, small_extent)
+
+# Calculate the distance from each cell in the template raster to the nearest river
+distance_to_river <- distance(small_raster)
+
+# Basic plot with terra
+plot(distance_to_river, 
+     main="Distance to River", 
+     col=viridis::viridis(100), 
+     xlab="Longitude", ylab="Latitude")
+
+# Advanced version using ggplot
+
+library(ggplot2)
+library(viridis)
+
+# Convert raster to a data frame for ggplot
+raster_df <- as.data.frame(distance_to_river, xy=TRUE)
+
+# Use ggplot to plot
+ggplot(raster_df, aes(x = x, y = y, fill = layer)) +
+  geom_tile() +
+  scale_fill_viridis(name="Distance to River", option="D") +
+  labs(title="Distance to River", x="Longitude", y="Latitude") +
+  coord_fixed()
+
+# Write the output as Tiff files
+writeRaster(distance_to_river, "distance_to_river.tif")
diff --git a/src/utils/Geographical_Region/region-geografica.dbf b/src/utils/Geographical_Region/region-geografica.dbf
new file mode 100644
index 0000000..4cbc0d7
Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.dbf differ
diff --git a/src/utils/Geographical_Region/region-geografica.prj b/src/utils/Geographical_Region/region-geografica.prj
new file mode 100644
index 0000000..3867261
--- /dev/null
+++ b/src/utils/Geographical_Region/region-geografica.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_SIRGAS_2000",DATUM["D_SIRGAS_2000",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
\ No newline at end of file
diff --git a/src/utils/Geographical_Region/region-geografica.shp b/src/utils/Geographical_Region/region-geografica.shp
new file mode 100644
index 0000000..392a3e4
Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.shp differ
diff --git a/src/utils/Geographical_Region/region-geografica.shx b/src/utils/Geographical_Region/region-geografica.shx
new file mode 100644
index 0000000..42717aa
Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.shx differ
diff --git a/src/utils/predictor_table.xlsx b/src/utils/predictor_table.xlsx
index e84a446..7e49921 100644
Binary files a/src/utils/predictor_table.xlsx and b/src/utils/predictor_table.xlsx differ