Skip to content

Commit

Permalink
Add units agnostic harmonisation (#42)
Browse files Browse the repository at this point in the history
* Add failing test of units handling via standard interfaces

* Add test of convenience handling method

* Pass tests of single method

* Fix tests

* Add first test of multiple timeseries handling

* Pass first multi timeseries test

* Pass tests of handling multiple timeseries

* Add tests of error handling

* Avoid warning

* Format

* Satisfy stickler

* Update CI dependencies

* Typo

* Fix dependencies again

* Test multiple matching overrides

* Test default decision tree propogation

* Finalise last test

* Add example notebook

* Docstring

* Format

* Appease stickler
  • Loading branch information
znicholls authored Mar 17, 2022
1 parent 867a173 commit 6d81bb1
Show file tree
Hide file tree
Showing 10 changed files with 4,333 additions and 30 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci-cd-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
conda env update --file ci/environment-conda-default.yml
conda env update --file ci/environment-conda-forge.yml
conda env update --file doc/environment.yml
pip install -e .[tests,deploy]
pip install -e .[tests,deploy,units]
# if we want to remove stickler
# - name: Run format and linting tests
# shell: bash -l {0}
Expand Down Expand Up @@ -103,7 +103,7 @@ jobs:
conda env update --file ci/environment-conda-default.yml
conda env update --file ci/environment-conda-forge.yml
conda env update --file doc/environment.yml
pip install -e .[tests,deploy]
pip install -e .[tests,deploy,units]
- name: Install ipopt (${{ runner.os }})
# see https://github.com/conda-forge/ipopt-feedstock/issues/55
if: startsWith(runner.os, 'Windows')
Expand Down Expand Up @@ -161,7 +161,7 @@ jobs:
conda env update --file ci/environment-conda-forge.yml
conda env update --file doc/environment.yml
conda install -q pandas==${{ matrix.pandas-version }}
pip install .[tests]
pip install .[tests,units]
- name: Run tests
shell: bash -l {0}
run: |
Expand Down Expand Up @@ -206,7 +206,7 @@ jobs:
conda env update --file ci/environment-conda-default.yml
conda env update --file ci/environment-conda-forge.yml
conda env update --file doc/environment.yml
pip install -e .[tests,deploy]
pip install -e .[tests,deploy,units]
- name: Download data
shell: bash -l {0}
env:
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,14 @@ docs: $(VENV_DIR) ## make the docs
.PHONY: virtual-environment
virtual-environment: $(VENV_DIR) ## make virtual environment for development

$(VENV_DIR): $(CI_ENVIRONMENT_CONDA_DEFAULT_FILE) $(CI_ENVIRONMENT_CONDA_FORGE_FILE) $(ENVIRONMENT_DOC_FILE)
$(VENV_DIR): setup.py $(CI_ENVIRONMENT_CONDA_DEFAULT_FILE) $(CI_ENVIRONMENT_CONDA_FORGE_FILE) $(ENVIRONMENT_DOC_FILE)
$(CONDA_EXE) config --add channels conda-forge # sets conda-forge as highest priority
# install requirements
$(CONDA_EXE) env update --name $(CONDA_DEFAULT_ENV) --file $(CI_ENVIRONMENT_CONDA_DEFAULT_FILE)
$(CONDA_EXE) env update --name $(CONDA_DEFAULT_ENV) --file $(CI_ENVIRONMENT_CONDA_FORGE_FILE)
$(CONDA_EXE) env update --name $(CONDA_DEFAULT_ENV) --file $(ENVIRONMENT_DOC_FILE)
# Install development setup
$(VENV_DIR)/bin/pip install -e .[tests,deploy]
$(VENV_DIR)/bin/pip install -e .[tests,deploy,units]
touch $(VENV_DIR)

.PHONY: release-on-conda
Expand Down
175 changes: 175 additions & 0 deletions aneris/convenience.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from openscm_units import unit_registry

from .harmonize import Harmonizer, default_methods
from .errors import (
AmbiguousHarmonisationMethod,
MissingHarmonisationYear,
MissingHistoricalError,
)
from .methods import harmonize_factors


def harmonise_all(scenarios, history, harmonisation_year, overrides=None):
"""
Harmonise all timeseries in ``scenarios`` to match ``history``
Parameters
----------
scenarios : :obj:`pd.DataFrame`
:obj:`pd.DataFrame` containing the timeseries to be harmonised
history : :obj:`pd.DataFrame`
:obj:`pd.DataFrame` containing the historical timeseries to which
``scenarios`` should be harmonised
harmonisation_year : int
The year in which ``scenarios`` should be harmonised to ``history``
overrides : :obj:`pd.DataFrame`
If not provided, the default aneris decision tree is used. Otherwise,
``overrides`` must be a :obj:`pd.DataFrame` containing any
specifications for overriding the default aneris methods. Each row
specifies one override. The override method is specified in the
"method" columns. The other columns specify which of the timeseries in
``scenarios`` should use this override by specifying metadata to match (
e.g. variable, region). If a cell has a null value (evaluated using
`pd.isnull()`) then that scenario characteristic will not be used for
filtering for that override e.g. if you have a row with "method" equal
to "constant_ratio", region equal to "World" and variable is null then
all timeseries in the World region will use the "constant_ratio"
method. In contrast, if you have a row with "method" equal to
"constant_ratio", region equal to "World" and variable is
"Emissions|CO2" then only timeseries with variable equal to
"Emissions|CO2" and region equal to "World" will use the
"constant_ratio" method.
Returns
-------
:obj:`pd.DataFrame`
The harmonised timeseries
Notes
-----
This interface is nowhere near as sophisticated as aneris' other
interfaces. It simply harmonises timeseries, it does not check sectoral
sums or other possible errors which can arise when harmonising. If you need
such features, do not use this interface.
Raises
------
MissingHistoricalError
No historical data is provided for a given timeseries
MissingHarmonisationYear
A value for the harmonisation year is missing or is null in ``history``
AmbiguousHarmonisationMethod
``overrides`` do not uniquely specify the harmonisation method for a
given timeseries
"""
# use groupby to maintain indexes, not sure if there's a better way because
# this will likely be super slow
res = scenarios.groupby(scenarios.index.names).apply(
_harmonise_single, history, harmonisation_year, overrides
)

return res


def _harmonise_single(timeseries, history, harmonisation_year, overrides):
assert timeseries.shape[0] == 1
# unclear why we don't use pyam or scmdata for filtering
mdata = {
k: v for k, v in zip(timeseries.index.names, timeseries.index.to_list()[0])
}

variable = mdata["variable"]
region = mdata["region"]

hist_variable = history.index.get_level_values("variable") == variable
hist_region = history.index.get_level_values("region") == region
relevant_hist = history[hist_variable & hist_region]

if relevant_hist.empty:
error_msg = "No historical data for `{}` `{}`".format(region, variable)
raise MissingHistoricalError(error_msg)

if harmonisation_year not in relevant_hist:
error_msg = "No historical data for year {} for `{}` `{}`".format(
harmonisation_year, region, variable
)
raise MissingHarmonisationYear(error_msg)

if relevant_hist[harmonisation_year].isnull().all():
error_msg = "Historical data is null for year {} for `{}` `{}`".format(
harmonisation_year, region, variable
)
raise MissingHarmonisationYear(error_msg)

# convert units
hist_unit = relevant_hist.index.get_level_values("unit").unique()[0]
relevant_hist = _convert_units(
relevant_hist, current_unit=hist_unit, target_unit=mdata["unit"]
)
# set index for rest of processing (as units are now consistent)
relevant_hist.index = timeseries.index.copy()

if overrides is not None:
method = overrides.copy()
for key, value in mdata.items():
if key in method:
method = method[(method[key] == value) | method[key].isnull()]

if overrides is not None and method.shape[0] > 1:
error_msg = (
"Ambiguous harmonisation overrides for metdata `{}`, the "
"following methods match: {}".format(mdata, method)
)
raise AmbiguousHarmonisationMethod(
"More than one override for metadata: {}".format(mdata)
)

if overrides is None or method.empty:
default, _ = default_methods(
relevant_hist, timeseries, base_year=harmonisation_year
)
method_to_use = default.values[0]

else:
method_to_use = method["method"].values[0]

return _harmonise_aligned(
timeseries, relevant_hist, harmonisation_year, method_to_use
)


def _convert_units(inp, current_unit, target_unit):
# would be simpler using scmdata or pyam
out = inp.copy()
out.iloc[:, :] = (
(out.values * unit_registry(current_unit)).to(target_unit).magnitude
)
out = out.reset_index("unit")
out["unit"] = target_unit
out = out.set_index("unit", append=True)

return out


def _harmonise_aligned(timeseries, history, harmonisation_year, method):
# seems odd that the methods are stored in a class instance
harmonise_func = Harmonizer._methods[method]
delta = _get_delta(timeseries, history, method, harmonisation_year)

return harmonise_func(timeseries, delta, harmonize_year=harmonisation_year)


def _get_delta(timeseries, history, method, harmonisation_year):
if method == "budget":
return history

offset, ratio = harmonize_factors(timeseries, history, harmonisation_year)
if "ratio" in method:
return ratio

return offset
16 changes: 16 additions & 0 deletions aneris/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
class AmbiguousHarmonisationMethod(ValueError):
"""
Error raised when harmonisation methods are ambiguous
"""


class MissingHistoricalError(ValueError):
"""
Error raised when historical data is missing
"""


class MissingHarmonisationYear(ValueError):
"""
Error raised when the harmonisation year is missing
"""
25 changes: 17 additions & 8 deletions aneris/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,10 @@ def reduce_offset(df, offset, final_year='2050', harmonize_year='2015'):
df = df.copy()
yi, yf = int(harmonize_year), int(final_year)
numcols = utils.numcols(df)
numcols_int = [int(v) for v in numcols]
# get factors that reduce from 1 to 0; factors before base year are > 1
f = lambda year: -(year - yi) / float(yf - yi) + 1
factors = [f(int(year)) if year <= final_year else 0.0 for year in numcols]
factors = [f(year) if year <= yf else 0.0 for year in numcols_int]
# add existing values to offset time series
offsets = pd.DataFrame(np.outer(offset, factors),
columns=numcols, index=offset.index)
Expand Down Expand Up @@ -171,17 +172,19 @@ def reduce_ratio(df, ratios, final_year='2050', harmonize_year='2015'):
df = df.copy()
yi, yf = int(harmonize_year), int(final_year)
numcols = utils.numcols(df)
numcols_int = [int(v) for v in numcols]
# get factors that reduce from 1 to 0, but replace with 1s in years prior
# to harmonization
f = lambda year: -(year - yi) / float(yf - yi) + 1
prefactors = [f(int(harmonize_year))
for year in numcols if year < harmonize_year]
postfactors = [f(int(year)) if year <= final_year else 0.0
for year in numcols if year >= harmonize_year]
prefactors = [f(yi)
for year in numcols_int if year < yi]
postfactors = [f(year) if year <= yf else 0.0
for year in numcols_int if year >= yi]
factors = prefactors + postfactors
# multiply existing values by ratio time series
ratios = pd.DataFrame(np.outer(ratios - 1, factors),
columns=numcols, index=ratios.index) + 1

df[numcols] = df[numcols] * ratios
return df

Expand Down Expand Up @@ -402,7 +405,9 @@ def default_method_choice(
return 'constant_offset'
else:
# is this co2?
if row.gas == 'CO2':
# ZN: This gas dependence isn't documented in the default
# decision tree
if hasattr(row, "gas") and row.gas == 'CO2':
return ratio_method
# is cov big?
if np.isfinite(row['cov']) and row['cov'] > luc_cov_threshold:
Expand Down Expand Up @@ -469,8 +474,12 @@ def default_methods(hist, model, base_year, method_choice=None, **kwargs):
kwargs['luc_cov_threshold'] = 10

y = str(base_year)
h = hist[y]
m = model[y]
try:
h = hist[base_year]
m = model[base_year]
except KeyError:
h = hist[y]
m = model[y]
dH = (h - m).abs() / h
f = h / m
dM = (model.max(axis=1) - model.min(axis=1)).abs() / model.max(axis=1)
Expand Down
Loading

0 comments on commit 6d81bb1

Please sign in to comment.