Skip to content

Commit

Permalink
Address recent pandas deprecation and future warnings (#43)
Browse files Browse the repository at this point in the history
* Replace DataFrame.append by concat

* Use public pandas.testing instead of pandas.util.testing

* Use up-to-date xlrd and switch to openpyxl

* utils: Use raw string in re.sub call

* Replace MultiIndex slicing with new isin helper

MultiIndex slicing with lists that are not guaranteed to match is
deprecated and will raise KeyError if a single option is missing.
  • Loading branch information
coroa authored Feb 26, 2022
1 parent 6af4d5d commit 867a173
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 38 deletions.
2 changes: 1 addition & 1 deletion aneris/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def pd_write(df, f, *args, **kwargs):
if f.endswith('csv'):
df.to_csv(f, index=index, *args, **kwargs)
else:
writer = pd.ExcelWriter(f, engine='xlsxwriter')
writer = pd.ExcelWriter(f)
df.to_excel(writer, index=index, *args, **kwargs)
writer.save()

Expand Down
30 changes: 8 additions & 22 deletions aneris/harmonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from aneris import utils
from aneris import pd_read
from aneris.utils import isin
from aneris.methods import harmonize_factors, constant_offset, reduce_offset, \
constant_ratio, reduce_ratio, linear_interpolate, model_zero, hist_zero, \
budget, coeff_of_var, default_methods
Expand Down Expand Up @@ -72,7 +73,8 @@ def __init__(
self.base_year = str(config[key]) if key in config else '2015'
self.data = data[utils.numcols(data)]
self.model = pd.Series(index=self.data.index,
name=self.base_year).to_frame()
name=self.base_year,
dtype=float).to_frame()
self.history = history
self.methods_used = None
self.offsets, self.ratios = harmonize_factors(
Expand Down Expand Up @@ -514,34 +516,18 @@ def _get_global_overrides(overrides, gases, sector):
# None if no overlap with gases
if overrides is None:
return None
gases = overrides.index.get_level_values('gas').intersection(gases)
gases = list(set(gases)) # single instance for each gas
if len(gases) == 0:
return None

# This tried to be fancy with multi index slicing at one point, but caused
# too much trouble. Now it is just done brute force.

# Downselect overrides that match global gas values
o = overrides
idx = o.index.names
o = o.reset_index()
o = o[o.region == 'World']
o = o[o.sector == sector]
o = o[o.gas.isin(gases)]
if o.empty:
return None
else:
return o.set_index(idx)['method']
o = overrides.loc[isin(region="World", sector=sector, gas=gases)]
return o if not o.empty else None


def _harmonize_global_total(config, prefix, suffix, hist, model, overrides,
default_global_gases=True):
all_gases = list(model.index.get_level_values('gas').unique())
gases = utils.harmonize_total_gases if default_global_gases else all_gases
sector = '|'.join([prefix, suffix])
idx = (pd.IndexSlice['World', gases, sector],
pd.IndexSlice[:])
idx = isin(region="World", gas=gases, sector=sector)
h = hist.loc[idx].copy()

try:
Expand Down Expand Up @@ -614,7 +600,7 @@ def _harmonize_regions(config, prefix, suffix, regions, hist, model, overrides,
_warn(msg)
model = model[~idx]
totals = '|'.join([prefix, suffix])
sector_total_idx = model.index.get_level_values('sector').isin([totals])
sector_total_idx = isin(model, sector=totals)
subsector_idx = ~sector_total_idx
# step 2: on the "clean" df, recalculate those totals
subsectors_with_total_df = (
Expand Down Expand Up @@ -642,7 +628,7 @@ def _harmonize_regions(config, prefix, suffix, regions, hist, model, overrides,
mapping = regions[regions['Native Region Code'] != 'World'].copy()
aggdf = utils.agg_regions(model, mapping=mapping,
rfrom='Native Region Code', rto='5_region')
model = model.append(aggdf)
model = pd.concat([model, aggdf])
assert(not model.isnull().values.any())

# duplicates come in from World and World being translated
Expand Down
4 changes: 2 additions & 2 deletions aneris/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ def budget(df, df_hist, harmonize_year='2015'):

harmonize_year = int(harmonize_year)

df = df.set_axis(df.columns.astype(int), 'columns', inplace=False)
df_hist = df_hist.set_axis(df_hist.columns.astype(int), 'columns', inplace=False)
df = df.set_axis(df.columns.astype(int), axis='columns')
df_hist = df_hist.set_axis(df_hist.columns.astype(int), axis='columns')

data_years = df.columns
hist_years = df_hist.columns
Expand Down
28 changes: 23 additions & 5 deletions aneris/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
import os
import re
from functools import reduce
from operator import and_

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -168,7 +170,7 @@ def remove_emissions_prefix(x, gas='XXX'):
"""Return x with emissions prefix removed, e.g.,
Emissions|XXX|foo|bar -> foo|bar
"""
return re.sub('^Emissions\|{}\|'.format(gas), '', x)
return re.sub(r'^Emissions\|{}\|'.format(gas), '', x)


def recalculated_row_idx(df, prefix='', suffix=''):
Expand Down Expand Up @@ -439,14 +441,14 @@ def _add_totals(self, totals):
grp_idx = [x for x in df_idx if x != 'sector']
rows = self.df.groupby(grp_idx).sum().reset_index()
rows['sector'] = totals
self.df = self.df.append(rows)
self.df = pd.concat([self.df, rows])

def _add_aggregates(self):
mapping = pd_read(iamc_path('sector_mapping.xlsx'),
sheet_name='Aggregates')
mapping = mapping.applymap(remove_emissions_prefix)

rows = pd.DataFrame(columns=self.df.columns)
rows = []
for sector in mapping['IAMC Parent'].unique():
# mapping for aggregate sector for all gases
_map = mapping[mapping['IAMC Parent'] == sector]
Expand All @@ -458,9 +460,9 @@ def _add_aggregates(self):

# add aggregate to rows
subset = subset.groupby(df_idx).sum().reset_index()
rows = rows.append(subset)
rows.append(subset)

self.df = self.df.append(rows)
self.df = pd.concat([self.df] + rows)


class FormatTranslator(object):
Expand Down Expand Up @@ -625,3 +627,19 @@ def _convert_units(self, df, tostd=True):
assert((df.units == 'kt').all())
df.loc[where, numcols(df)] /= 1e3
df.loc[where, 'units'] = 'Mt'


def isin(df=None, **filters):
"""Constructs a MultiIndex selector
Usage
-----
> df.loc[isin(region="World", gas=["CO2", "N2O"])]
or with explicit df to get boolean mask
> isin(df, region="World", gas=["CO2", "N2O"])
"""
def tester(df):
tests = (df.index.isin(np.atleast_1d(v), level=k) for k, v in filters.items())
return reduce(and_, tests, next(tests))

return tester if df is None else tester(df)
2 changes: 1 addition & 1 deletion ci/environment-conda-default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ dependencies:
- pyyaml
- xlrd >=2.0
- openpyxl
- xlsxwriter
- matplotlib
- seaborn
- pyomo
- pytest
- jupyter
- nbconvert
Expand Down
6 changes: 3 additions & 3 deletions doc/environment.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
dependencies:
- numpy
- "pandas>=1.1"
- pandas >=1.1
- pyyaml
- xlrd
- xlsxwriter
- xlrd >=2.0
- openpyxl
- matplotlib
- seaborn>=0.8
- pyomo>=5
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
'PyYAML',
'xlrd>=2.0',
'openpyxl',
'xlsxwriter',
'matplotlib',
'pyomo>=5'
]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_default_decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from aneris import harmonize

import pandas.util.testing as pdt
import pandas.testing as pdt


def make_index(length, gas='CH4', sector='Energy'):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal
from os.path import join

from aneris import cli
Expand Down
26 changes: 25 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import pandas as pd

import pandas.util.testing as pdt
import pandas.testing as pdt

from aneris import utils

Expand Down Expand Up @@ -189,3 +190,26 @@ def test_combine_rows_sumall():
exp = exp.reindex(columns=obs.columns)
clean = lambda df: df.sort_index().reset_index()
pdt.assert_frame_equal(clean(obs), clean(exp))


def test_isin():
df = combine_rows_df()
exp = pd.DataFrame({
'sector': [
'sector1',
'sector2',
'sector1',
],
'region': ['a', 'a', 'b'],
'2010': [1.0, 4.0, 2.0],
'foo': [-1.0, -4.0, 2.0],
'units': ['Mt'] * 3,
'gas': ['BC'] * 3,
}).set_index(utils.df_idx)
obs = exp.loc[
utils.isin(sector=["sector1", "sector2"], region=["a", "b", "non-existent"])
]
pdt.assert_frame_equal(obs, exp)

with pytest.raises(KeyError):
utils.isin(df, region="World", non_existing_level="foo")

0 comments on commit 867a173

Please sign in to comment.