diff --git a/OpenMRG/README.md b/OpenMRG/README.md new file mode 100644 index 0000000..be6ad21 --- /dev/null +++ b/OpenMRG/README.md @@ -0,0 +1,8 @@ +# OpenMRG + +## 5-minute data with a duration of 2.5 hours + +This data was copied from the `mergeplg` example data, where it can later be deleted, +and the files have been renamed to reflect the fact that all data was aggregated +to a 5-minute temporal resolution. The notebook that was used to produce the files +was also just copied. It was not run again. diff --git a/OpenMRG/notebooks/openMRG_create_from_Erlend.ipynb b/OpenMRG/notebooks/openMRG_create_from_Erlend.ipynb new file mode 100644 index 0000000..95624d4 --- /dev/null +++ b/OpenMRG/notebooks/openMRG_create_from_Erlend.ipynb @@ -0,0 +1,697 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "88b8bc90-55a0-4a41-9926-58ecf8204586", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-02 11:33:06.828569: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import sys\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pycomlink as pycml\n", + "import tqdm\n", + "import xarray as xr" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6cb5a332-7482-4c50-9438-2643b34485a0", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# Navigate to your local sandbox clone\n", + "path_transformer = str(\n", + " Path(\"/home/erlend/Documents/GitHub/OPENSENSE_sandbox/notebooks/\").resolve()\n", + ")\n", + "sys.path.append(path_transformer)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e8d299ab-812e-4528-915d-97e9957f4cf4", + "metadata": {}, + "outputs": [], + "source": [ + "import opensense_data_downloader_and_transformer as oddt" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "25a0dbb8-2344-46a3-acdc-7c0d1efb9ee4", + "metadata": {}, + "outputs": [], + "source": [ + "# User specified starting times\n", + "start = \"2015-07-25T12:30\"\n", + "end = \"2015-07-25T15:00\"\n", + "\n", + "# local path to OpenMRG data (will be created if it does not exist)\n", + "local_path = \"/home/erlend/offline_data/andersson_2022_OpenMRG/\"" + ] + }, + { + "cell_type": "markdown", + "id": "065b8baf-61ad-4697-828e-d23a5ff8f7b2", + "metadata": {}, + "source": [ + "# Note:\n", + "The following code creates example data used for testing mergeplg. It requires pycomlink to run. " + ] + }, + { + "cell_type": "markdown", + "id": "2d23a2b4-a08f-4094-adc0-3bdc8f9bf40c", + "metadata": { + "tags": [] + }, + "source": [ + "# Derive small example dataset from the OpenMRG dataset from SMHI with large CML dataset\n", + "source: https://zenodo.org/record/6673751" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8dfbf1e0-72be-44c2-8e98-07e6e7b44fac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File already exists at desired location /home/erlend/offline_data/andersson_2022_OpenMRG/OpenMRG.zip\n", + "Not downloading!\n" + ] + } + ], + "source": [ + "oddt.download_andersson_2022_OpenMRG(local_path=local_path, print_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5e0d4ba5-580a-4217-9252-26dbf1e00e24", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/erlend/Documents/GitHub/OPENSENSE_sandbox/notebooks/opensense_data_downloader_and_transformer.py:302: FutureWarning: the `pandas.MultiIndex` object(s) passed as 'sublink' coordinate(s) or data variable(s) will no longer be implicitly promoted and wrapped into multiple indexed coordinates in the future (i.e., one coordinate for each multi-index level + one dimension coordinate). If you want to keep this behavior, you need to first wrap it explicitly using `mindex_coords = xarray.Coordinates.from_pandas_multiindex(mindex_obj, 'dim')` and pass it as coordinates, e.g., `xarray.Dataset(coords=mindex_coords)`, `dataset.assign_coords(mindex_coords)` or `dataarray.assign_coords(mindex_coords)`.\n", + " ds_multindex = ds.assign_coords({'sublink':df_metadata.index})\n" + ] + } + ], + "source": [ + "# Transform first part of the data\n", + "ds1 = oddt.transform_andersson_2022_OpenMRG(\n", + " fn=local_path + \"OpenMRG.zip\", # navigate to your local sandbox clone\n", + " path_to_extract_to=local_path,\n", + " time_start_end=(\n", + " None,\n", + " \"2015-07-15T00:00\",\n", + " ), # default (None, None) -> no timeslicing. ie. ('2015-08-31T00', None),\n", + " restructure_data=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "12e61732-0c96-43ca-aaa5-74ef8a2fb07f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/erlend/Documents/GitHub/OPENSENSE_sandbox/notebooks/opensense_data_downloader_and_transformer.py:302: FutureWarning: the `pandas.MultiIndex` object(s) passed as 'sublink' coordinate(s) or data variable(s) will no longer be implicitly promoted and wrapped into multiple indexed coordinates in the future (i.e., one coordinate for each multi-index level + one dimension coordinate). If you want to keep this behavior, you need to first wrap it explicitly using `mindex_coords = xarray.Coordinates.from_pandas_multiindex(mindex_obj, 'dim')` and pass it as coordinates, e.g., `xarray.Dataset(coords=mindex_coords)`, `dataset.assign_coords(mindex_coords)` or `dataarray.assign_coords(mindex_coords)`.\n", + " ds_multindex = ds.assign_coords({'sublink':df_metadata.index})\n" + ] + } + ], + "source": [ + "# Transform second part of the data\n", + "ds2 = oddt.transform_andersson_2022_OpenMRG(\n", + " fn=local_path + \"OpenMRG.zip\", # navigate to your local sandbox clone\n", + " path_to_extract_to=local_path,\n", + " time_start_end=(\n", + " \"2015-07-15T00:00\",\n", + " None,\n", + " ), # default (None, None) -> no timeslicing. ie. ('2015-08-31T00', None),\n", + " restructure_data=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "698fb4ea-6480-44a6-9d29-cbfe4e8d7804", + "metadata": {}, + "outputs": [], + "source": [ + "# Potentially dampen largest overestimations due to noise.\n", + "ds1 = ds1.resample(time=\"1min\").first(skipna=True)\n", + "ds2 = ds2.resample(time=\"1min\").first(skipna=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ce356246-3223-49c3-af5e-9006c0fd349b", + "metadata": {}, + "outputs": [], + "source": [ + "# concat and drop overlaying duplicate\n", + "ds_cml = xr.concat([ds1, ds2], dim=\"time\").drop_duplicates(dim=\"time\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6e42a9ad-c148-4d12-b627-93ba0fb3d480", + "metadata": {}, + "outputs": [], + "source": [ + "ds_cml[\"tsl\"] = ds_cml.tsl.interpolate_na(dim=\"time\", method=\"linear\", max_gap=\"5min\")\n", + "ds_cml[\"rsl\"] = ds_cml.rsl.interpolate_na(dim=\"time\", method=\"linear\", max_gap=\"5min\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f11f3ae6-cd93-488e-945c-87dcc89f3988", + "metadata": {}, + "outputs": [], + "source": [ + "ds_cml.attrs[\"file author(s)\"] = \"Maximilian Graf, Erlend Øydvin and Christian Chwala\"\n", + "ds_cml.attrs[\"title\"] = \"Transformed and resampled OpenMRG-CML\"\n", + "ds_cml.attrs[\"comment\"] += (\n", + " \"\\n\\nTransformed and resampled dataset: \\n\"\n", + " \"rsl and tsl was resampled to 1 minute resolution using the first occurring\"\n", + " \"value in every minute. \"\n", + " \"Gaps shorter than 5min was linearly interpolated. \"\n", + ")\n", + "ds_cml.attrs[\"contact\"] += \", erlend.oydvin@nmbu.no\"" + ] + }, + { + "cell_type": "markdown", + "id": "f893f815-c4cc-46b6-acf9-45ef20d0f0fe", + "metadata": {}, + "source": [ + "# CML data" + ] + }, + { + "cell_type": "markdown", + "id": "ec6a4dcf-5ce3-4c03-99f5-ae2a7616ea92", + "metadata": {}, + "source": [ + "### CML quality control" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9500f7dc-c180-4dbf-b736-0c5e961c670c", + "metadata": {}, + "outputs": [], + "source": [ + "# calculate total loss\n", + "ds_cml[\"tl\"] = ds_cml.tsl - ds_cml.rsl" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "6ae4c870-5ef1-4465-a5c1-0bcf3b9f63e2", + "metadata": {}, + "outputs": [], + "source": [ + "# remove cmls with strong diurnal cycles\n", + "keep = np.where(\n", + " (\n", + " (ds_cml.tl.rolling(time=60 * 5, center=True).std() > 2).mean(dim=\"time\") <= 0.1\n", + " ).all(dim=\"sublink_id\")\n", + ")[0]\n", + "ds_cml = ds_cml.isel(cml_id=keep)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d7341aed-f638-4ec5-96d3-d707827a85d3", + "metadata": {}, + "outputs": [], + "source": [ + "# remove cmls with very noisy periods\n", + "keep = np.where(\n", + " (\n", + " (ds_cml.tl.rolling(time=60, center=True).std() > 0.8).mean(dim=\"time\") <= 0.35\n", + " ).all(dim=\"sublink_id\")\n", + ")[0]\n", + "ds_cml = ds_cml.isel(cml_id=keep)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d904b3a1-5936-4828-b1f6-bebe9b050475", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 11%| | 39/359/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + " 36%|▎| 129/35/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + " 39%|▍| 139/35/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + " 74%|▋| 264/35/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + " 76%|▊| 272/35/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + " 82%|▊| 294/35/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1545: RuntimeWarning: All-NaN slice encountered\n", + " return _nanquantile_unchecked(\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "/home/erlend/miniforge3/envs/pycomlink-dev/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", + " var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,\n", + "100%|█| 359/35\n" + ] + } + ], + "source": [ + "# Keep CMLs where the variability of the wet periods is much larger than\n", + "# for dry periods (experimental!)\n", + "keep = []\n", + "for cml_id in tqdm.tqdm(range(ds_cml.cml_id.size)):\n", + " # fig, ax = plt.subplots(3, 1, figsize = (10, 5))\n", + " tl = ds_cml.isel(cml_id=cml_id, sublink_id=0).tl\n", + "\n", + " # subtract daily trend to avoid removing data with trend\n", + " tl = tl - tl.rolling(time=60 * 24, min_periods=1, center=True).median()\n", + " # tl.plot(ax = ax[0])\n", + "\n", + " tl_diff = ds_cml.isel(cml_id=cml_id, sublink_id=0).tl.diff(dim=\"time\", n=1)\n", + " roll_std_diff = tl_diff.rolling(time=60, min_periods=1, center=True).std()\n", + " roll_std_diff_scaled = roll_std_diff / (2 * roll_std_diff.quantile(0.8, dim=\"time\"))\n", + " wet = roll_std_diff_scaled > 1\n", + "\n", + " std_wet = np.nanstd(tl.data[:-1][wet])\n", + " std_dry = np.nanstd(tl.data[:-1][~wet])\n", + "\n", + " if (std_wet / std_dry) > 4:\n", + " keep.append(cml_id)\n", + "\n", + " # (tl_wet_std/tl_dry_std).plot(ax = ax[2])\n", + " # plt.show()\n", + "keep = np.array(keep)" + ] + }, + { + "cell_type": "markdown", + "id": "cd48811e-0869-43d4-8ed6-c32eb44d5610", + "metadata": {}, + "source": [ + "### CML rain rates" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d4c01371-0069-4575-9ff1-8d0e2866619e", + "metadata": {}, + "outputs": [], + "source": [ + "ds_cml = ds_cml.isel(cml_id=keep)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c679cff3-f6dc-4f01-909d-459a07167203", + "metadata": {}, + "outputs": [], + "source": [ + "# # Calculate wet periods\n", + "roll_std_dev = ds_cml.tl.rolling(time=60, center=True).std()\n", + "threshold = 1.12 * roll_std_dev.quantile(0.8, dim=\"time\")\n", + "ds_cml[\"wet_std\"] = roll_std_dev > threshold\n", + "\n", + "ds_cml[\"baseline\"] = pycml.processing.baseline.baseline_constant(\n", + " trsl=ds_cml.tl,\n", + " wet=ds_cml.wet_std,\n", + " n_average_last_dry=5,\n", + ")\n", + "\n", + "# ds_cml['wet_cnn'] = (('cml_id', 'time'), np.zeros(\n", + "# [ds_cml.cml_id.size, ds_cml.time.size]\n", + "# ).astype(bool))\n", + "# for cml_id in tqdm.tqdm(ds_cml.cml_id.values):\n", + "# cnn_out = cnn.cnn_wet_dry(\n", + "# trsl_channel_1 = ds_cml.sel(cml_id = cml_id).isel(sublink_id = 0).tl.values,\n", + "# trsl_channel_2 = ds_cml.sel(cml_id = cml_id).isel(sublink_id = 1).tl.values,\n", + "# )\n", + "# ds_cml['wet_cnn'].loc[{'cml_id':cml_id}]= cnn_out > 0.82\n", + "# del cnn_out\n", + "# gc.collect()\n", + "# # estimate the baseline during rain events\n", + "# ds_cml['tl_nan'] = xr.where(ds_cml.wet_cnn, np.nan, ds_cml.tl)\n", + "# ds_cml['baseline'] = xr.where(\n", + "# ds_cml.wet_cnn,\n", + "# ds_cml.tl_nan.rolling(time = 60*12, min_periods = 60).median(),\n", + "# ds_cml.tl\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d522b9cb-d54d-45ca-957b-8ea64cb47f5c", + "metadata": {}, + "outputs": [], + "source": [ + "ds_cml[\"A_obs\"] = ds_cml.tl - ds_cml.baseline\n", + "ds_cml[\"A_obs\"] = ds_cml.A_obs.where(ds_cml.A_obs >= 0, 0)\n", + "\n", + "# Pastorek using parameters that looks good for the German,\n", + "# Swedish and Norwegian dataset\n", + "ds_cml[\"waa\"] = pycml.processing.wet_antenna.waa_pastorek_2021_from_A_obs(\n", + " A_obs=ds_cml.A_obs,\n", + " f_Hz=ds_cml.frequency * 1e6,\n", + " pol=ds_cml.polarization.data,\n", + " L_km=ds_cml.length / 1000,\n", + " A_max=6,\n", + " zeta=0.7, # 0.55 is default\n", + " d=0.15,\n", + ")\n", + "\n", + "# calculate attenuation caused by rain and remove negative attenuation\n", + "ds_cml[\"A\"] = ds_cml.tl - ds_cml.baseline - ds_cml.waa\n", + "ds_cml[\"A\"].data[ds_cml.A < 0] = 0\n", + "# derive rain rate via the k-R relation\n", + "ds_cml[\"R\"] = pycml.processing.k_R_relation.calc_R_from_A(\n", + " A=ds_cml.A,\n", + " L_km=ds_cml.length.astype(float) / 1000, # convert to km\n", + " f_GHz=ds_cml.frequency / 1000, # convert to GHz\n", + " pol=ds_cml.polarization,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ddd43e2e-94e1-43ae-8e75-76adaf88c183", + "metadata": {}, + "outputs": [], + "source": [ + "# Slice and convert to sum 5 min\n", + "ds_cml_res = (\n", + " ds_cml[[\"R\"]]\n", + " .isel(sublink_id=0)\n", + " .sel(time=slice(start, end))\n", + " .resample(time=\"5min\")\n", + " .sum(skipna=True)\n", + " / 60\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "fea86f62-9776-463a-b4e4-947853f7e41d", + "metadata": {}, + "outputs": [], + "source": [ + "ds_cml_res.to_netcdf(\"./openmrg_cml.nc\")" + ] + }, + { + "cell_type": "markdown", + "id": "7f68d397-9f80-43e0-8441-cf7ab3d12664", + "metadata": {}, + "source": [ + "# Radar data" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "7e95a260-1bb1-4aa1-a1a0-150d93497b2c", + "metadata": {}, + "outputs": [], + "source": [ + "# read radar data and convert to Opensense naming conventions\n", + "ds_rad = (\n", + " xr.open_dataset(local_path + \"radar/radar.nc\")\n", + " .rename( # create using notebook in data folder\n", + " {\"lat\": \"latitudes\", \"lon\": \"longitudes\"}\n", + " )\n", + " .sel(time=slice(start, end))\n", + " .transpose(\"time\", \"y\", \"x\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "10ff57e3-f7eb-406c-959d-7bffe56aae76", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply masrhal palmer to get rainfall rates\n", + "ds_rad[\"rainfall_amount\"] = (10 ** (ds_rad.data / 10) / 200) ** (5 / 8)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "7cfea2bb-b3d9-431e-9695-ca1f11177d52", + "metadata": {}, + "outputs": [], + "source": [ + "# flip along y axis to work in the grid intersection function\n", + "ds_rad[\"latitudes\"] = ((\"y\", \"x\"), np.flip(ds_rad.latitudes.data, axis=0))\n", + "ds_rad[\"rainfall_amount\"] = (\n", + " (\"time\", \"y\", \"x\"),\n", + " np.flip(ds_rad.rainfall_amount.data, axis=1),\n", + ")\n", + "\n", + "# convert to sum 5 min\n", + "ds_rad[\"rainfall_amount\"] = ds_rad.rainfall_amount * (5 / 60)\n", + "\n", + "ds_rad.attrs[\"comment\"] += (\n", + " \"\\n dBZ was converted to rainfall [mm/h] using the marshal-palmer equation: \"\n", + " \"( 10 **(dBZ/10) / 200)**(5/8). \"\n", + " \" Done by Erlend Øydvin. \"\n", + ")\n", + "ds_rad.rainfall_amount.attrs[\"units\"] = \"sum 5min\"" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e3db5236-2d4e-42b0-a2fe-1d1c9956d8f2", + "metadata": {}, + "outputs": [], + "source": [ + "ds_rad = ds_rad.drop_vars(\"data\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "85342307-a74f-4000-9772-30750ce458e7", + "metadata": {}, + "outputs": [], + "source": [ + "ds_rad.to_netcdf(\"./openmrg_rad.nc\")" + ] + }, + { + "cell_type": "markdown", + "id": "6f431763-ebc4-41fc-8e02-b767a0446253", + "metadata": {}, + "source": [ + "# Municipality gauge data" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "b40510d5-8f26-474c-a12f-1ba9816c9a94", + "metadata": {}, + "outputs": [], + "source": [ + "# read gauge data from CSV and store to xarray, copied from Graf compare article\n", + "df_gauge = pd.read_csv(\n", + " local_path + \"gauges/city/CityGauges-2015JJA.csv\", index_col=0, parse_dates=True\n", + ")\n", + "df_gauge_meta = pd.read_csv(local_path + \"gauges/city/CityGauges-metadata.csv\")\n", + "\n", + "df_gauge.index = df_gauge.index.tz_localize(None).astype(\"datetime64[ns]\")\n", + "\n", + "ds_gauges = xr.Dataset(\n", + " data_vars={\"rainfall_amount\": ([\"station_id\", \"time\"], df_gauge.T)},\n", + " coords={\n", + " \"station_id\": df_gauge_meta.index.to_numpy(),\n", + " \"time\": df_gauge.index.to_numpy(),\n", + " \"lon\": ([\"station_id\"], df_gauge_meta.Longitude_DecDeg),\n", + " \"lat\": ([\"station_id\"], df_gauge_meta.Latitude_DecDeg),\n", + " \"location\": ([\"station_id\"], df_gauge_meta.Location),\n", + " \"type\": ([\"station_id\"], df_gauge_meta.Type),\n", + " \"quantization\": ([\"station_id\"], df_gauge_meta[\"Resolution (mm)\"]),\n", + " },\n", + ")\n", + "# shorten and resample to sum 5 min\n", + "ds_gauges = ds_gauges.sel(time=slice(start, end)).resample(time=\"5min\").sum()\n", + "ds_gauges.to_netcdf(\"./openmrg_municp_gauge.nc\")" + ] + }, + { + "cell_type": "markdown", + "id": "59a3a1e8-a295-4f37-bfc9-6b87c55670c6", + "metadata": {}, + "source": [ + "# SMHI gauge data" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "037ca7f5-11f2-4e29-b877-5221c30f3e00", + "metadata": {}, + "outputs": [], + "source": [ + "# Taken from the comparison paper\n", + "df_gauge_smhi = pd.read_csv(\n", + " local_path + \"gauges/smhi/GbgA-71420-2015JJA.csv\",\n", + " index_col=0,\n", + " parse_dates=True,\n", + ")\n", + "\n", + "# Convert to no timezone to make to_numpy work instead of .values (RUFF complains)\n", + "df_gauge_smhi.index = df_gauge_smhi.index.tz_localize(None).astype(\"datetime64[ns]\")\n", + "\n", + "\n", + "ds_gauges_smhi = xr.Dataset(\n", + " data_vars={\n", + " \"rainfall_amount\": ([\"station_id\", \"time\"], [df_gauge_smhi.Pvol_mm.to_numpy()]),\n", + " },\n", + " coords={\n", + " \"station_id\": [\"SMHI\"],\n", + " \"time\": df_gauge_smhi.index.to_numpy(),\n", + " \"lon\": ([\"station_id\"], [11.9924]),\n", + " \"lat\": ([\"station_id\"], [57.7156]),\n", + " \"location\": ([\"station_id\"], [\"Goeteburg A\"]),\n", + " \"type\": ([\"station_id\"], [\"15 min rainfall sum\"]),\n", + " \"quantization\": ([\"station_id\"], [0.1]),\n", + " },\n", + ")\n", + "\n", + "# Slice time\n", + "ds_gauges_smhi = ds_gauges_smhi.sel(time=slice(start, end))\n", + "\n", + "# from 15 min sum to 5 min sum\n", + "ds_gauges_smhi = ds_gauges_smhi.resample(time=\"5min\").bfill() / 3\n", + "\n", + "# Save\n", + "ds_gauges_smhi.to_netcdf(\"./openmrg_smhi_gauge.nc\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1cba8c5-8cdd-4a0e-9f8a-17e79ca21436", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40dde408-4ab1-48c0-8292-5988633581cd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/OpenMRG/openmrg_cml_5min_2h.nc b/OpenMRG/openmrg_cml_5min_2h.nc new file mode 100644 index 0000000..c5edad7 Binary files /dev/null and b/OpenMRG/openmrg_cml_5min_2h.nc differ diff --git a/OpenMRG/openmrg_municp_gauge_5min_2h.nc b/OpenMRG/openmrg_municp_gauge_5min_2h.nc new file mode 100644 index 0000000..af3d737 Binary files /dev/null and b/OpenMRG/openmrg_municp_gauge_5min_2h.nc differ diff --git a/OpenMRG/openmrg_rad_5min_2h.nc b/OpenMRG/openmrg_rad_5min_2h.nc new file mode 100644 index 0000000..6f5d130 Binary files /dev/null and b/OpenMRG/openmrg_rad_5min_2h.nc differ diff --git a/OpenMRG/openmrg_smhi_gauge_5min_2h.nc b/OpenMRG/openmrg_smhi_gauge_5min_2h.nc new file mode 100644 index 0000000..2874018 Binary files /dev/null and b/OpenMRG/openmrg_smhi_gauge_5min_2h.nc differ