diff --git a/src/scripts/LULCC/preparation/Get_SoilGrids.py b/src/scripts/LULCC/preparation/Get_SoilGrids.py new file mode 100644 index 0000000..78de84f --- /dev/null +++ b/src/scripts/LULCC/preparation/Get_SoilGrids.py @@ -0,0 +1,70 @@ +from owslib.wcs import WebCoverageService +import rasterio +import matplotlib.pyplot as plt +from rasterio import plot + +# WCS URLs for different soil properties +wcs_urls = { + 'phh2o': 'http://maps.isric.org/mapserv?map=/map/phh2o.map', + 'bdod': 'http://maps.isric.org/mapserv?map=/map/bdod.map', + 'cec': 'http://maps.isric.org/mapserv?map=/map/cec.map', + 'cfvo': 'https://maps.isric.org/mapserv?map=/map/cfvo.map', + 'clay': 'https://maps.isric.org/mapserv?map=/map/clay.map', + 'nitrogen': 'https://maps.isric.org/mapserv?map=/map/nitrogen.map', + 'sand': 'https://maps.isric.org/mapserv?map=/map/sand.map', + 'silt': 'https://maps.isric.org/mapserv?map=/map/silt.map', + 'soc': 'https://maps.isric.org/mapserv?map=/map/soc.map', +<<<<<<< HEAD +======= + 'ocs': 'https://maps.isric.org/mapserv?map=/map/ocs.map', +>>>>>>> 67f953ec92d5f3e8523aaa160e6654c867c0ebd5 + 'ocd': 'https://maps.isric.org/mapserv?map=/map/ocd.map' + + # Add other WCS URLs here + # ... +} + +# Soil properties +<<<<<<< HEAD +soil_properties = ['phh2o', 'bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'sand', 'silt', 'soc', 'ocd'] +======= +soil_properties = ['phh2o', 'bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'sand', 'silt', 'soc', 'ocd', 'ocs'] +>>>>>>> 67f953ec92d5f3e8523aaa160e6654c867c0ebd5 + +# Depth intervals +depth_intervals = ['0-5cm', '5-15cm', '15-30cm', '30-60cm', '60-100cm', '100-200cm'] + +# A bounding box broadly matching Peru +bbox = (-81.81, -18.80, -68.15, 0.32) + +for property in soil_properties: + wcs_url = wcs_urls.get(property, '') + if wcs_url: + wcs = WebCoverageService(wcs_url, version='1.0.0') + + for depth in depth_intervals: + identifier = f'{property}_{depth}_mean' + file_name = f'./data/{property}_{depth}_mean.tif' + title = f'Mean {property} between {depth} deep in Peru' + + # Fetch the map segment within the bounding box + response = wcs.getCoverage( + identifier=identifier, + crs='urn:ogc:def:crs:EPSG:4326', + bbox=bbox, + resx=0.002, resy=0.002, + format='GEOTIFF_INT16') + + # Fetch the coverage for Peru and save it to disk + with open(file_name, 'wb') as file: + file.write(response.read()) + print(f"download complete: {file_name}") + + # Open the file from disk + ph = rasterio.open(file_name, driver="GTiff") + + # Use the plot class to plot the map + plot.show(ph, title=title, cmap='gist_ncar') + plt.show() + else: + print(f"WCS URL for {property} not defined.") diff --git a/src/scripts/LULCC/preparation/Rasterize_shapefile.R b/src/scripts/LULCC/preparation/Rasterize_shapefile.R new file mode 100644 index 0000000..44602f2 --- /dev/null +++ b/src/scripts/LULCC/preparation/Rasterize_shapefile.R @@ -0,0 +1,66 @@ +######################################################################## +## Script name: Rasterize_shapefile +## Purpose of script: rasterize the shapefils of three geographical regions in Peru +## Author: Chenyu Shen +## Date Created: 2023-10-27 +## Notes: +######################################################################## + +### ========================================================================= +### A - Preparation +### ========================================================================= + +## Install and load packages + +#vector other required packages +packs<-c("terra") + +#install new packages +new.packs <- packs[!(packs %in% installed.packages()[, "Package"])] +if (length(new.packs)) install.packages(new.packs) + +# Load required packages +invisible(lapply(packs, require, character.only = TRUE)) + +# Set working directory +setwd("C:/Work/Peru-env-futures") + +# Read in the shapefile +regions <- vect("src/utils/Geographical_Region/region-geografica.shp") + +# Read in the reference raster +ref_raster <- rast("src/utils/ref_grid.tif") + + +### ========================================================================= +### B - Check and Transform CRS +### ========================================================================= + +# Check CRS of both files +crs_regions <- crs(regions) +crs_ref <- crs(ref_raster) + +# Transform if necessary +if (crs_regions != crs_ref) { + regions <- project(regions, crs_ref) +} + + +### ========================================================================= +### C - Rasterize the shapefile +### ========================================================================= + +# Access and view the attibutes table of the shapefile +attributes <- values(regions) +print(attributes) + +# Rasterize +rasterized_regions <- rasterize(regions, ref_raster, field="nombre", fun="min") + +# Plot rasterized data +plot(rasterized_regions, col=rainbow(3)) + +# Overlay vector data (e.g., boundaries or points) +plot(regions, add=TRUE) + +writeRaster(rasterized_regions, "src/utils/rasterized_regions.tif") diff --git a/src/scripts/LULCC/preparation/Ref_grid_creation.R b/src/scripts/LULCC/preparation/Ref_grid_creation.R index 0540149..3f9b43f 100644 --- a/src/scripts/LULCC/preparation/Ref_grid_creation.R +++ b/src/scripts/LULCC/preparation/Ref_grid_creation.R @@ -1,19 +1,19 @@ ######################################################################## -## Script name: -## Purpose of script: -## Author: +## Script name: Ref_grid_creation +## Purpose of script: Set up uniform spatial grid for project as reference +## Author: Chenyu Shen ## Date Created: 2023-10-13 ## Notes: ######################################################################## ### ========================================================================= -### Preparation +### A - Preparation ### ========================================================================= ## Install and load packages #vector other required packages -packs<-c() +packs<-c("terra","raster") #install new packages new.packs <- packs[!(packs %in% installed.packages()[, "Package"])] @@ -22,5 +22,64 @@ if (length(new.packs)) install.packages(new.packs) # Load required packages invisible(lapply(packs, require, character.only = TRUE)) -# Source custom functions -invisible(sapply(list.files("Scripts/Functions", \ No newline at end of file +# Set working directory +setwd(Data_dir) + +# Load one of the LULC layers +input_raster <- rast("LULC/Copernicus/Peru_Land_Cover_2019.tif") + + + +### ========================================================================= +### B - Extract information and remove unnecessary cells +### ========================================================================= + +# Extract extent, CRS, and resolution information +extent_info <- ext(input_raster) +crs_info <- crs(input_raster) +res_info <- res(input_raster) +nrows <- nrow(input_raster) +ncols <- ncol(input_raster) + + +# Create a reference grid with the same extent, CRS, and resolution +ref_grid <- rast(extent_info, nrows = nrows, ncols = ncols, crs = crs_info, vals = NA) + +# Define the extent of desired region (xmin, xmax, ymin, ymax) +desired_extent <- c(xmin = -81.81, xmax = -68.15, ymin = -18.80, ymax = 0.32) + +# Crop the reference grid to the extent of desired region +cropped_gird <- crop(ref_grid, desired_extent) + +# Save the cropped raster +writeRaster(cropped_gird, "ref_grid.tif", overwrite = TRUE) + + +### ========================================================================= +### C - Calculate the number of pixels within the Peru boundary +### ========================================================================= + +# # Define the number of chunks (adjust this value as needed, more chunks for larger rasters) +# num_chunks <- 100 +# +# # Get the number of rows per chunk +# rows_per_chunk <- ceiling(nrow(input_raster) / num_chunks) +# +# # Initialize the count +# non_zero_pixels <- 0 +# +# # Loop over each chunk +# for (i in 1:num_chunks) { +# # Define the start and end rows for the current chunk +# start_row <- ((i-1) * rows_per_chunk) + 1 +# end_row <- min(i * rows_per_chunk, nrow(input_raster)) +# +# # Retrieve the values for the chunk +# chunk <- input_raster[start_row:end_row, ] +# +# +# # Sum the non-zero values in the chunk and add to the total count +# non_zero_pixels <- non_zero_pixels + sum(chunk != 0, na.rm = TRUE) +# } +# +# print(non_zero_pixels) diff --git a/src/scripts/LULCC/preparation/Write_to_excel.py b/src/scripts/LULCC/preparation/Write_to_excel.py new file mode 100644 index 0000000..82aaad0 --- /dev/null +++ b/src/scripts/LULCC/preparation/Write_to_excel.py @@ -0,0 +1,42 @@ +import os +import pandas as pd + +# Set the directory where your TIF files are located +directory_path = 'C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Soil' +relative_path_root = 'Peru_data_collab/Preds/Raw/Soil' + +# List all TIF files in the directory +tif_files = [f for f in os.listdir(directory_path) if f.endswith('.tif')] + +# Load existing data from the Excel file +excel_path = "C:/Users/chshen/OneDrive - ETH Zurich/Work/predictor_table.xlsx" # Replace with the path to your Excel file +existing_data = pd.read_excel(excel_path) + +# Append each TIF file path as a new row to the DataFrame +for tif_file in tif_files: + # Extract the file name without the .tif extension to use as the 'Variable_name' + variable_name = os.path.splitext(tif_file)[0] + + # Construct the relative file path + relative_file_path = os.path.join(relative_path_root, tif_file) + + # Define the new data entry + new_data_entry = { + 'Variable_name': variable_name, + 'Data_citation': 'Soilgrid', + 'URL': 'https://maps.isric.org/', + 'Original_resolution': '250m', + 'Static_or_dynamic': 'static', # or 'dynamic' based on your data + 'Temporal_coverage': '', + 'Temporal_resolution': '', + 'Prepared': 'No', + 'Raw_data_path': relative_file_path, + # ... add other fields as necessary + } + + # Convert the dictionary to a DataFrame to ensure similar structure + new_data_df = pd.DataFrame([new_data_entry]) + existing_data = pd.concat([existing_data, new_data_df], ignore_index=True) + +# Save the updated DataFrame back to an Excel file +existing_data.to_excel(excel_path, index=False) diff --git a/src/scripts/preparation/Aggregate_LULC.R b/src/scripts/preparation/Aggregate_LULC.R new file mode 100644 index 0000000..b74f7cc --- /dev/null +++ b/src/scripts/preparation/Aggregate_LULC.R @@ -0,0 +1,55 @@ +######################################################################## +## Script name: Aggregate_LULC +## Purpose of script: Aggregate the MapBiomas LULC data to 120m resolution +## Author: Chenyu Shen +## Date Created: 2023-11-20 +## Notes: +######################################################################## + +### ========================================================================= +### A - Preparation +### ========================================================================= + +## Install and load packages + +#vector other required packages +packs<-c("terra","raster","sp") + +#install new packages +new.packs <- packs[!(packs %in% installed.packages()[, "Package"])] +if (length(new.packs)) install.packages(new.packs) + +# Load required packages +invisible(lapply(packs, require, character.only = TRUE)) + +# Set working directory +setwd(Data_dir) + +# Set output directory +output_dir <- "LULC/MapBiomas_120m" + +### ========================================================================= +### B - Aggregate the Data +### ========================================================================= +for (year in 1985:2021) { + # Construct file path for the input raster + input_path <- sprintf("LULC/MapBiomas/peru_coverage_%d.tif", year) + + # Check if the file exists + if (!file.exists(input_path)) { + next # Skip to the next iteration if the file does not exist + } + + # Load the raster + lulc_30m <- raster(input_path) + + # Aggregate to 120m resolution + agg_factor <- 4 # 30m to 120m needs a factor of 4 + lulc_120m <- aggregate(lulc_30m, fact=agg_factor, fun=modal, na.rm=TRUE) + + # Construct file path for the output raster + output_path <- sprintf("%s/peru_coverage_120m_%d.tif", output_dir, year) + + # Save the aggregated raster + writeRaster(lulc_120m, filename=output_path, overwrite=TRUE) +} \ No newline at end of file diff --git a/src/scripts/preparation/Distance_to_water.R b/src/scripts/preparation/Distance_to_water.R new file mode 100644 index 0000000..ec5d7d3 --- /dev/null +++ b/src/scripts/preparation/Distance_to_water.R @@ -0,0 +1,65 @@ +######################################################################## +## Script name: Distance_to_water +## Purpose of script: Calculate the distance from water bodies (navigable rivers, +## small rivers and streams, lakes and lagoons) +## Author: Chenyu Shen +## Date Created: 2023-11-15 +## Notes: +######################################################################## + +### ========================================================================= +### A - Preparation +### ========================================================================= + +## Install and load packages + +#vector other required packages +packs<-c("raster", "sf", "terra") + +#install new packages +new.packs <- packs[!(packs %in% installed.packages()[, "Package"])] +if (length(new.packs)) install.packages(new.packs) + +# Load required packages +invisible(lapply(packs, require, character.only = TRUE)) + +# Set working directory +setwd(Data_dir) + + +# Load the shapefiles of waterbody +navigable_rivers <- st_read("Preds/Raw/Shapefiles/navigable_river/Rio_navegables.shp") +small_rivers <- st_read("Preds/Raw/Shapefiles/river_stream/Rios_Quebradas.shp") +lakes_lagoons <- st_read("Preds/Raw/Shapefiles/lake_lagoon/Lagos_lagunas_Project.shp") + +# Load and reproject Peru's boundary to match the CRS of the water body rasters +peru_boundary <- st_read("Preds/Raw/Shapefiles/Country/nivel-politico-1.shp") +template_raster <- rast("ref_grid.tif") +peru_boundary_transformed <- st_transform(peru_boundary, crs = crs(template_raster)) + +### ========================================================================= +### B - Calculate Distance +### ========================================================================= + +# Rasterize the Shapefiles +raster_navigable_rivers <- rasterize(navigable_rivers, template_raster,field=1) +raster_small_rivers <- rasterize(small_rivers, template_raster,field=1) +raster_lakes_lagoons <- rasterize(lakes_lagoons, template_raster,field=1) +# field=1 is used to mark the presence of the feature. +# It's a placeholder to indicate where the features are in the raster grid + +# Clip the rasters to Peru's boundary +raster_navigable_rivers_clipped <- mask(raster_navigable_rivers, peru_boundary_transformed) +raster_small_rivers_clipped <- mask(raster_small_rivers, peru_boundary_transformed) +raster_lakes_lagoons_clipped <- mask(raster_lakes_lagoons, peru_boundary_transformed) + +# Calculate Distance within Peru's boundary +distance_navigable_rivers <- terra::distance(raster_navigable_rivers_clipped) +distance_small_rivers <- terra::distance(raster_small_rivers_clipped) +distance_lakes_lagoons <- terra::distance(raster_lakes_lagoons_clipped) + + +# Write the output as Tiff files +writeRaster(distance_navigable_rivers, "distance_navigable_rivers.tif") +writeRaster(distance_small_rivers, "distance_small_rivers.tif") +writeRaster(distance_lakes_lagoons, "distance_lakes_lagoons.tif") diff --git a/src/scripts/preparation/Plot_LULC.R b/src/scripts/preparation/Plot_LULC.R new file mode 100644 index 0000000..9952217 --- /dev/null +++ b/src/scripts/preparation/Plot_LULC.R @@ -0,0 +1,100 @@ +######################################################################## +## Script name: Plot_LULC +## Purpose of script:Plot LULC data +## Author: Chenyu Shen +## Date Created: 2023-10-23 +## Notes: +######################################################################## + +### ========================================================================= +### A - Preparation +### ========================================================================= + +## Install and load packages + +#vector other required packages +packs<-c("terra","raster") + +#install new packages +new.packs <- packs[!(packs %in% installed.packages()[, "Package"])] +if (length(new.packs)) install.packages(new.packs) + +# Load required packages +invisible(lapply(packs, require, character.only = TRUE)) + +# Set working directory +setwd(Data_dir) + +# Load all the LULC files +years <- 1985:1990 +LULC_list <- list() + +for(year in years){ + path <- paste0("LULC/peru_coverage_",year,".tif") + LULC_list[[as.character(year)]] <- rast(path) +} + + + +my_colors <- c( + "3" = "#006400", + "4" = "#32CD32", + "5" = "#687537", + "6" = "#76A5AF", + "9" = "#935132", + "11" = "#45C2A5", + "12" = "#B8AF4F", + "13" = "#F1C232", + "15" = "#FFD966", + "18" = "#E974ED", + "21" = "#FFEFC3", + "24" = "#AA0000", + "30" = "#FF0000", + "25" = "#FF8585", + "33" = "#0000FF", + "34" = "#4FD3FF", + "27" = "#D5D5E5" +) + +labels <- c( + "3" = "Forest", + "4" = "Dry forest", + "5" = "Mangrove", + "6" = "Flooded forest", + "9" = "Forest plantation", + "11" = "Wetland", + "12" = "Grasslands/herbaceous", + "13" = "Scrubland and other non forest formations", + "15" = "Pasture", + "18" = "Agriculture", + "21" = "Agricultural mosaic", + "24" = "Infrastructure", + "30" = "Mining", + "25" = "Other non vegetated area", + "33" = "River,lake and ocean", + "34" = "Glacier", + "27" = "Not observed" +) + +land_cover_map <- LULC_list[["1985"]] + +# Get unique values using freq +unique_vals <- freq(land_cover_map)[, 2] + +# Match unique values with the predefined colors and labels +plot_colors <- my_colors[as.character(unique_vals)] +plot_labels <- labels[as.character(unique_vals)] + + + +# 设置布局为1行2列,主要为了绘制地图和图例 +layout(matrix(1:2, nrow=1), widths=c(4, 1)) + +# 在第一个区域绘制土地利用图 +par(mar=c(5, 5, 4, 2)) # 调整边距 +plot(land_cover_map, col=plot_colors, main=paste("Land Cover Map", year), axes=TRUE, legend=FALSE) + +# 在第二个区域绘制图例 +par(mar=c(5, 2, 4, 2)) # 调整边距 +plot.new() +legend("center", legend=plot_labels, fill=plot_colors, title="Land Cover Types", cex=0.5, bty="n") diff --git a/src/scripts/preparation/Python/.idea/.gitignore b/src/scripts/preparation/Python/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/src/scripts/preparation/Python/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml b/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/src/scripts/preparation/Python/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/src/scripts/preparation/Python/.idea/misc.xml b/src/scripts/preparation/Python/.idea/misc.xml new file mode 100644 index 0000000..9de2865 --- /dev/null +++ b/src/scripts/preparation/Python/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/src/scripts/preparation/Python/.idea/modules.xml b/src/scripts/preparation/Python/.idea/modules.xml new file mode 100644 index 0000000..2a08ed2 --- /dev/null +++ b/src/scripts/preparation/Python/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/src/scripts/preparation/Python/.idea/preparation.iml b/src/scripts/preparation/Python/.idea/preparation.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/src/scripts/preparation/Python/.idea/preparation.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/src/scripts/preparation/Python/.idea/vcs.xml b/src/scripts/preparation/Python/.idea/vcs.xml new file mode 100644 index 0000000..77a3cc7 --- /dev/null +++ b/src/scripts/preparation/Python/.idea/vcs.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg b/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg new file mode 100644 index 0000000..cd89cb9 --- /dev/null +++ b/src/scripts/preparation/Python/Data/reprojected_lagoons.cpg @@ -0,0 +1 @@ +ISO-8859-1 \ No newline at end of file diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf b/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf new file mode 100644 index 0000000..6c144a5 Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.dbf differ diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.prj b/src/scripts/preparation/Python/Data/reprojected_lagoons.prj new file mode 100644 index 0000000..e9ae3f0 --- /dev/null +++ b/src/scripts/preparation/Python/Data/reprojected_lagoons.prj @@ -0,0 +1 @@ +PROJCS["Peru_Central_Zone",GEOGCS["GCS_Provisional_S_American_1956",DATUM["D_Provisional_S_American_1956",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",720000.0],PARAMETER["False_Northing",1039979.159],PARAMETER["Central_Meridian",-76.0],PARAMETER["Scale_Factor",0.99932994],PARAMETER["Latitude_Of_Origin",-9.5],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.shp b/src/scripts/preparation/Python/Data/reprojected_lagoons.shp new file mode 100644 index 0000000..343bbce Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.shp differ diff --git a/src/scripts/preparation/Python/Data/reprojected_lagoons.shx b/src/scripts/preparation/Python/Data/reprojected_lagoons.shx new file mode 100644 index 0000000..59d8d95 Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_lagoons.shx differ diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.cpg b/src/scripts/preparation/Python/Data/reprojected_rivers.cpg new file mode 100644 index 0000000..cd89cb9 --- /dev/null +++ b/src/scripts/preparation/Python/Data/reprojected_rivers.cpg @@ -0,0 +1 @@ +ISO-8859-1 \ No newline at end of file diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.dbf b/src/scripts/preparation/Python/Data/reprojected_rivers.dbf new file mode 100644 index 0000000..aab7647 Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.dbf differ diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.prj b/src/scripts/preparation/Python/Data/reprojected_rivers.prj new file mode 100644 index 0000000..e9ae3f0 --- /dev/null +++ b/src/scripts/preparation/Python/Data/reprojected_rivers.prj @@ -0,0 +1 @@ +PROJCS["Peru_Central_Zone",GEOGCS["GCS_Provisional_S_American_1956",DATUM["D_Provisional_S_American_1956",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",720000.0],PARAMETER["False_Northing",1039979.159],PARAMETER["Central_Meridian",-76.0],PARAMETER["Scale_Factor",0.99932994],PARAMETER["Latitude_Of_Origin",-9.5],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.shp b/src/scripts/preparation/Python/Data/reprojected_rivers.shp new file mode 100644 index 0000000..949be16 Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.shp differ diff --git a/src/scripts/preparation/Python/Data/reprojected_rivers.shx b/src/scripts/preparation/Python/Data/reprojected_rivers.shx new file mode 100644 index 0000000..e828388 Binary files /dev/null and b/src/scripts/preparation/Python/Data/reprojected_rivers.shx differ diff --git a/src/scripts/preparation/Python/DistanceToWater.py b/src/scripts/preparation/Python/DistanceToWater.py new file mode 100644 index 0000000..712056a --- /dev/null +++ b/src/scripts/preparation/Python/DistanceToWater.py @@ -0,0 +1,72 @@ +import geopandas as gpd +import rasterio +from rasterio.features import rasterize +from scipy.ndimage import distance_transform_edt +import numpy as np +from rasterio.mask import mask + +def calculate_distance_to_river(river_shapefile, template_raster_path, meta): + rivers_gdf = gpd.read_file(river_shapefile) + with rasterio.open(template_raster_path) as src: + river_raster = rasterize( + [(geometry, 1) for geometry in rivers_gdf.geometry], + out_shape=(src.height, src.width), + transform=src.transform, + fill=0, + all_touched=True, + dtype='float32' + ) + + # Compute Distance to Nearest River + inverse_river_raster = np.where(river_raster == 1, 0, 1) + distance = distance_transform_edt(inverse_river_raster) * src.res[0] + + return distance + +# Paths to the reprojected river shapefiles +river_shapefile = './Data/reprojected_rivers.shp' +lagoon_shapefile = './Data/reprojected_lagoons.shp' +template_raster_path = './Data/reprojected_raster.tif' + +# Extract metadata from the template raster +with rasterio.open(template_raster_path) as src: + meta = src.meta.copy() + +# Calculate distances +distance_river = calculate_distance_to_river(river_shapefile, template_raster_path, meta) +distance_lagoon = calculate_distance_to_river(lagoon_shapefile, template_raster_path, meta) + +# Path Peru boundary shapefile +peru_boundary_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Peru_admin_boud/per_admbnda_adm0_ign_20200714.shp") +utm_crs = 'EPSG:24892' +peru_boundary_utm = peru_boundary_gdf.to_crs(utm_crs) + +# Clip the distance rater to the boundary Peru +def clip_raster_with_shapefile(raster_path, shapefile_path, output_raster_path): + # Load the boundary shapefile + boundary_gdf = gpd.read_file(shapefile_path) + + # Load the raster to be clipped + with rasterio.open(raster_path) as src: + out_image, out_transform = mask(src, boundary_gdf.geometry, crop=True) + out_meta = src.meta.copy() + + # Update the metadata to reflect the new raster dimensions + out_meta.update({"driver": "GTiff", + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform}) + + # Save the clipped raster + with rasterio.open(output_raster_path, 'w', **out_meta) as dest: + dest.write(out_image) + +# File paths +clipped_river_raster = 'clipped_river_raster.tif' +clipped_lagoon_raster = 'clipped_lagoon_raster.tif' + +# Clip the rasters +clip_raster_with_shapefile(distance_river, peru_boundary_utm, clipped_river_raster) +clip_raster_with_shapefile(distance_lagoon, peru_boundary_utm, clipped_lagoon_raster) + + diff --git a/src/scripts/preparation/Python/Get_SoilGrids.py b/src/scripts/preparation/Python/Get_SoilGrids.py new file mode 100644 index 0000000..c01638c --- /dev/null +++ b/src/scripts/preparation/Python/Get_SoilGrids.py @@ -0,0 +1,48 @@ + +######################################################################## +## Script name: Distance_to_water +## Purpose of script: Calculate the distance from water bodies (navigable rivers, +## small rivers and streams, lakes and lagoons) +## Author: Chenyu Shen +## Date Created: 2023-11-15 +## Notes: +######################################################################## + + +from owslib.wcs import WebCoverageService +import rasterio +from rasterio import plot +maps=['https://maps.isric.org/mapserv?map=/map/ocs.map','https://maps.isric.org/mapserv?map=/map/sand.map'] +identifiers=['ocs_0-30cm_mean','sand_0-30cm_mean'] +file_names=['./data/Peru_soil_carbon_stock_0-5_mean.tif','./data/Peru_sand_carbon_stock_0-5_mean.tif'] +for i in range(len(maps)): + + map=maps[i] + identifier=identifiers[i] + file_name=file_names[i] + + print(f"start download: {file_name}" ) + wcs = WebCoverageService(map, version='1.0.0') + + # A bounding box broadly matching Senegal: + bbox = (-81.81, -18.80, -68.15, 0.32) + + # The getCoverage method can now be used to fetch the map segment within the bounding box. Note the other parameters, Section 1 showed how to obtain them. + response = wcs.getCoverage( + identifier=identifier, + crs='urn:ogc:def:crs:EPSG:4326', + bbox=bbox, + resx=0.002, resy=0.002, + format='GEOTIFF_INT16') + + # create a .tif file and save data into this file + with open(file_name, 'wb') as file: + file.write(response.read()) + print(f"download complete: {file_name}" ) + print("="*80) + + # # read map data from the created file + # ph = rasterio.open("./data/Peru_soil_carbon_stock_0-5_mean.tif", driver="GTiff") + # + # # display the map + # plot.show(ph, title='Mean pH between 0 and 5 cm deep in Senegal', cmap='gist_ncar') \ No newline at end of file diff --git a/src/scripts/preparation/Python/Re-projection.py b/src/scripts/preparation/Python/Re-projection.py new file mode 100644 index 0000000..80426fc --- /dev/null +++ b/src/scripts/preparation/Python/Re-projection.py @@ -0,0 +1,44 @@ +import numpy as np +import geopandas as gpd +import rasterio +from rasterio.warp import calculate_default_transform, reproject, Resampling + +# Load and Reproject the River Shapefile +rivers_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Water/Rios/Rios.shp") +lagoons_gdf = gpd.read_file("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/Preds/Raw/Water/Lagunas/Lagunas.shp") +# Determine the appropriate UTM zone for Peru and reproject +utm_crs = 'EPSG:24892' # South American Datum 1969 (SAD69) / Peru Central zone +rivers_gdf_utm = rivers_gdf.to_crs(utm_crs) +lagoons_gdf_utm = lagoons_gdf.to_crs(utm_crs) + +# Save the Reprojected River Shapefile +rivers_gdf_utm.to_file('./reprojected_rivers.shp') +lagoons_gdf_utm.to_file('./reprojected_lagoons.shp') + +# Load the Template Raster +with rasterio.open("C:/Users/chshen/OneDrive - ETH Zurich/Work/Peru_data_collab/ref_grid.tif") as src: + transform, width, height = calculate_default_transform( + src.crs, utm_crs, src.width, src.height, *src.bounds) + meta = src.meta.copy() + meta.update({ + 'crs': utm_crs, + 'transform': transform, + 'width': width, + 'height': height + }) + + # Reproject the Template Raster + template = np.full((height, width), np.nan, dtype=rasterio.float32) + reproject( + source=rasterio.band(src, 1), + destination=template, + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=utm_crs, + resampling=Resampling.nearest) + + +# Save the Reprojected Raster +with rasterio.open('./reprojected_raster.tif', 'w', **meta) as dst: + dst.write(template, 1) diff --git a/src/scripts/preparation/Python/updated_data_processing_script.py b/src/scripts/preparation/Python/updated_data_processing_script.py new file mode 100644 index 0000000..aea7a3d --- /dev/null +++ b/src/scripts/preparation/Python/updated_data_processing_script.py @@ -0,0 +1,32 @@ + +import pandas as pd +import os + +def process_file(file_path): + # Extracting a prefix from the file name (assuming file name format is like 'Copper_reserves.xlsx') + file_name = os.path.basename(file_path) + prefix = ''.join([word[0] for word in file_name.split('_')]).lower() + + # Load the Excel file + df = pd.read_excel(file_path, sheet_name=0, skiprows=4) + + # Rename the first column to 'DEPARTMENT' for clarity + df.rename(columns={df.columns[0]: 'DEPARTMENT'}, inplace=True) + + # Drop rows at the end that are part of notes and sources if any + df = df.dropna(subset=['DEPARTMENT']) + + # Replace commas with dots and remove dashes + df.replace({',': '.', '-': ''}, regex=True, inplace=True) + + # Abbreviate column names from the second column onwards + columns = df.columns.tolist() + abbreviated_columns = [columns[0]] + [f'{prefix}_{col}' for col in columns[1:]] + df.columns = abbreviated_columns + + # Save the updated dataframe to CSV + output_csv_file_path = file_path.replace('.xlsx', '_processed.csv') + df.to_csv(output_csv_file_path, index=False) + +# Example usage +process_file('path_to_your_excel_file.xlsx') diff --git a/src/scripts/preparation/convert_stat_to_tif.R b/src/scripts/preparation/convert_stat_to_tif.R new file mode 100644 index 0000000..0df5ffb --- /dev/null +++ b/src/scripts/preparation/convert_stat_to_tif.R @@ -0,0 +1,50 @@ +######################################################################## +## Script name: convert_stat_to_tif +## Purpose of script: Convert Statistics from INEI to raster files +## Author: Chenyu Shen +## Date Created: 2023-12-04 +## Notes: +######################################################################## + +### ========================================================================= +### A - Preparation +### ========================================================================= + +## Install and load packages + +#vector other required packages +packs<-c("sf", "terra", "dplyr") + +#install new packages +new.packs <- packs[!(packs %in% installed.packages()[, "Package"])] +if (length(new.packs)) install.packages(new.packs) + +# Load required packages +invisible(lapply(packs, require, character.only = TRUE)) + +# Set working directory +setwd(Data_dir) + +# Load the shapefile of departments in Peru +peru_departments <- st_read("Preds/Raw/Shapefiles/Departamento/LIM_DEPARTAMENTO.shp") + +# Load the population data (assuming it's a CSV file; adjust as needed) +population_data <- read.csv("Population.csv") + +# Load the reference raster +template_raster <- rast("utils/ref_grid.tif") + + +### ========================================================================= +### B - Assign Population values as raster values +### ========================================================================= + +# Merge the shapefile with the population data +# Ensure that both datasets have a common column for merging (e.g., department name) +merged_data <- merge(peru_departments, population_data, by = "NOMBDEP") + +# Rasterize the population data +population_raster <- terra::rasterize(merged_data, template_raster, "X2017", fun = sum) + +# Export as TIFF +terra::writeRaster(population_raster, "population_map.tif", overwrite = TRUE) diff --git a/src/scripts/preparation/smaller_extent.R b/src/scripts/preparation/smaller_extent.R new file mode 100644 index 0000000..f435a13 --- /dev/null +++ b/src/scripts/preparation/smaller_extent.R @@ -0,0 +1,79 @@ +library(terra) +library(sf) + +# Set working directory +setwd(Data_dir) + +# Load the river shapefile with sf +rivers <- st_read("Preds/Raw/Shapefiles/navigable_river/Rio_navegables.shp") + +# Convert sf object to SpatVector object +rivers_vect <- vect(rivers) + +# Load the reference raster +template_raster <- rast("ref_grid.tif") + +# Rasterize the river shapefile +# Create a raster where river cells are 1 and others are NA +rivers_rasterized <- rasterize(rivers_vect, template_raster, field=1) + +# Load and reproject Peru's boundary to match the CRS of the water body rasters +peru_boundary <- st_read("Preds/Raw/Shapefiles/Country/nivel-politico-1.shp") +peru_boundary_transformed <- st_transform(peru_boundary, crs = crs(template_raster)) + +# Clip the rasters to Peru's boundary +rivers_clipped <- mask(rivers_rasterized, peru_boundary_transformed) + +# Define a smaller extent +# obtain the extent of the original raster +rows <- nrow(rivers_rasterized) +cols <- ncol(rivers_rasterized) + +# Calculate the center coordinates of the raster +center_row <- rows / 2 +center_col <- cols / 2 + +# Calculate the start and end rows and columns for cropping +start_row <- center_row - 2128 / 2 +end_row <- center_row + 2128 / 2 +start_col <- center_col - 1520 / 2 +end_col <- center_col + 1520 / 2 + +# Convert row and column indices to coordinates +start_x <- xFromCol(rivers_rasterized, start_col) +end_x <- xFromCol(rivers_rasterized, end_col) +start_y <- yFromRow(rivers_rasterized, end_row) +end_y <- yFromRow(rivers_rasterized, start_row) + +# Create the extent +small_extent <- terra::ext(start_x, end_x, start_y, end_y) + +# Crop the raster +small_raster <- crop(rivers_rasterized, small_extent) + +# Calculate the distance from each cell in the template raster to the nearest river +distance_to_river <- distance(small_raster) + +# Basic plot with terra +plot(distance_to_river, + main="Distance to River", + col=viridis::viridis(100), + xlab="Longitude", ylab="Latitude") + +# Advanced version using ggplot + +library(ggplot2) +library(viridis) + +# Convert raster to a data frame for ggplot +raster_df <- as.data.frame(distance_to_river, xy=TRUE) + +# Use ggplot to plot +ggplot(raster_df, aes(x = x, y = y, fill = layer)) + + geom_tile() + + scale_fill_viridis(name="Distance to River", option="D") + + labs(title="Distance to River", x="Longitude", y="Latitude") + + coord_fixed() + +# Write the output as Tiff files +writeRaster(distance_to_river, "distance_to_river.tif") diff --git a/src/utils/Geographical_Region/region-geografica.dbf b/src/utils/Geographical_Region/region-geografica.dbf new file mode 100644 index 0000000..4cbc0d7 Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.dbf differ diff --git a/src/utils/Geographical_Region/region-geografica.prj b/src/utils/Geographical_Region/region-geografica.prj new file mode 100644 index 0000000..3867261 --- /dev/null +++ b/src/utils/Geographical_Region/region-geografica.prj @@ -0,0 +1 @@ +GEOGCS["GCS_SIRGAS_2000",DATUM["D_SIRGAS_2000",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/src/utils/Geographical_Region/region-geografica.shp b/src/utils/Geographical_Region/region-geografica.shp new file mode 100644 index 0000000..392a3e4 Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.shp differ diff --git a/src/utils/Geographical_Region/region-geografica.shx b/src/utils/Geographical_Region/region-geografica.shx new file mode 100644 index 0000000..42717aa Binary files /dev/null and b/src/utils/Geographical_Region/region-geografica.shx differ diff --git a/src/utils/predictor_table.xlsx b/src/utils/predictor_table.xlsx index e84a446..7e49921 100644 Binary files a/src/utils/predictor_table.xlsx and b/src/utils/predictor_table.xlsx differ