Skip to content

Commit

Permalink
Add black (#50)
Browse files Browse the repository at this point in the history
* Add black to the test dependency

* Run black

* Ignore docs directory

* Fix stickler errors

* Fix stickler errors

* Rerun black

* Move pd_read/pd_write to utils to stop a circular dependency

* Needed to import pd_read from utils
  • Loading branch information
lewisjared authored Aug 4, 2022
1 parent c009308 commit ad6301e
Show file tree
Hide file tree
Showing 20 changed files with 1,036 additions and 916 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ build
dist
*.egg-info
.cache
.*
.*

venv
5 changes: 4 additions & 1 deletion .stickler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ linters:
python: 3
max-line-length: 88
fixer: false
ignore: I002, F403, E402, E731, E203
ignore: I002, F403, E402, E731, E203, W503
# stickler doesn't support 'exclude' for flake8 properly, so we disable it
# below with files.ignore:
# https://github.com/markstory/lint-review/issues/184
black:
config: ./pyproject.toml
fixer: false
files:
ignore:
- doc/**/*.py
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ publish-on-pypi: $(VENV_DIR) ## publish release on PyPI
echo run git status --porcelain to find dirty files >&2; \
fi;

.PHONY: black
black: $(VENV_DIR)
black .

.PHONY: ci_dl
ci_dl: $(VENV_DIR) ## run all the tests
cd tests/ci; python download_data.py
Expand Down
4 changes: 2 additions & 2 deletions aneris/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

from aneris._io import *
from aneris.harmonize import *
from aneris.utils import *

from ._version import get_versions
__version__ = get_versions()['version']

__version__ = get_versions()["version"]
del get_versions
74 changes: 19 additions & 55 deletions aneris/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import pandas as pd

from aneris.utils import isstr, isnum, iamc_idx
from aneris.utils import isstr, isnum, iamc_idx, pd_read

RC_DEFAULTS = """
config:
Expand All @@ -26,7 +26,7 @@


def _read_data(indfs):
datakeys = sorted([x for x in indfs if x.startswith('data')])
datakeys = sorted([x for x in indfs if x.startswith("data")])
df = pd.concat([indfs[k] for k in datakeys])
# don't know why reading from excel changes dtype and column types
# but I have to reset them manually
Expand All @@ -50,46 +50,6 @@ def _recursive_update(d, u):
return d


def pd_read(f, str_cols=False, *args, **kwargs):
"""Try to read a file with pandas, supports CSV and XLSX
Parameters
----------
f : string
the file to read in
str_cols : bool, optional
turn all columns into strings (numerical column names are sometimes
read in as numerical dtypes)
args, kwargs : sent directly to the Pandas read function
Returns
-------
df : pd.DataFrame
"""
if f.endswith('csv'):
df = pd.read_csv(f, *args, **kwargs)
else:
df = pd.read_excel(f, *args, **kwargs)

if str_cols:
df.columns = [str(x) for x in df.columns]

return df


def pd_write(df, f, *args, **kwargs):
"""Try to write a file with pandas, supports CSV and XLSX"""
# guess whether to use index, unless we're told otherwise
index = kwargs.pop('index', isinstance(df.index, pd.MultiIndex))

if f.endswith('csv'):
df.to_csv(f, index=index, *args, **kwargs)
else:
writer = pd.ExcelWriter(f)
df.to_excel(writer, index=index, *args, **kwargs)
writer.save()


def read_excel(f):
"""Read an excel-based input file for harmonization.
Expand All @@ -111,20 +71,23 @@ def read_excel(f):
model = _read_data(indfs)

# make an empty df which will be caught later
overrides = indfs['harmonization'] if 'harmonization' in indfs \
else pd.DataFrame([], columns=iamc_idx + ['Unit'])
overrides = (
indfs["harmonization"]
if "harmonization" in indfs
else pd.DataFrame([], columns=iamc_idx + ["Unit"])
)

# get run control
config = {}
if'Configuration' in overrides:
config = overrides[['Configuration', 'Value']].dropna()
config = config.set_index('Configuration').to_dict()['Value']
overrides = overrides.drop(['Configuration', 'Value'], axis=1)
if "Configuration" in overrides:
config = overrides[["Configuration", "Value"]].dropna()
config = config.set_index("Configuration").to_dict()["Value"]
overrides = overrides.drop(["Configuration", "Value"], axis=1)

# a single row of nans implies only configs provided,
# if so, only return the empty df
if len(overrides) == 1 and overrides.isnull().values.all():
overrides = pd.DataFrame([], columns=iamc_idx + ['Unit'])
overrides = pd.DataFrame([], columns=iamc_idx + ["Unit"])

return model, overrides, config

Expand All @@ -140,10 +103,10 @@ def __init__(self, rc=None, defaults=None):
Parameters
----------
rc : string, file, dictionary, optional
a path to a YAML file, a file handle for a YAML file, or a
a path to a YAML file, a file handle for a YAML file, or a
dictionary describing run control configuration
defaults : string, file, dictionary, optional
a path to a YAML file, a file handle for a YAML file, or a
a path to a YAML file, a file handle for a YAML file, or a
dictionary describing **default** run control configuration
"""
rc = rc or {}
Expand Down Expand Up @@ -171,22 +134,23 @@ def _get_path(self, key, fyaml, fname):

_fname = os.path.join(os.path.dirname(fyaml), fname)
if not os.path.exists(_fname):
msg = "YAML key '{}' in {}: {} is not a valid relative " + \
"or absolute path"
msg = (
"YAML key '{}' in {}: {} is not a valid relative " + "or absolute path"
)
raise IOError(msg.format(key, fyaml, fname))
return _fname

def _fill_relative_paths(self, fyaml, d):
file_keys = [
'exogenous',
"exogenous",
]
for k in file_keys:
if k in d:
d[k] = [self._get_path(k, fyaml, fname) for fname in d[k]]

def _load_yaml(self, obj):
check_rel_paths = False
if hasattr(obj, 'read'): # it's a file
if hasattr(obj, "read"): # it's a file
obj = obj.read()
if isstr(obj) and os.path.exists(obj):
check_rel_paths = True
Expand Down
83 changes: 47 additions & 36 deletions aneris/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,54 @@ def read_args():
aneris input.xlsx --history history.csv --regions regions.csv
"""
parser = argparse.ArgumentParser(
description=descr,
formatter_class=argparse.RawDescriptionHelpFormatter
description=descr, formatter_class=argparse.RawDescriptionHelpFormatter
)
input_file = 'Input data file.'
parser.add_argument('input_file', help=input_file)
history = 'Historical emissions in the base year.'
parser.add_argument('--history', help=history,
default=hist_path('history.csv'))
regions = 'Mapping of country iso-codes to native regions.'
parser.add_argument('--regions', help=regions,
default=region_path('message.csv'))
rc = 'Runcontrol YAML file (see http://mattgidden.com/aneris/config.html for examples).'
parser.add_argument('--rc', help=rc, default=None)
output_path = 'Path to use for output file names.'
parser.add_argument('--output_path', help=output_path, default='.')
output_prefix = 'Prefix to use for output file names.'
parser.add_argument('--output_prefix', help=output_prefix, default=None)
input_file = "Input data file."
parser.add_argument("input_file", help=input_file)
history = "Historical emissions in the base year."
parser.add_argument("--history", help=history, default=hist_path("history.csv"))
regions = "Mapping of country iso-codes to native regions."
parser.add_argument("--regions", help=regions, default=region_path("message.csv"))
rc = (
"Runcontrol YAML file "
"(see http://mattgidden.com/aneris/config.html for examples)."
)
parser.add_argument("--rc", help=rc, default=None)
output_path = "Path to use for output file names."
parser.add_argument("--output_path", help=output_path, default=".")
output_prefix = "Prefix to use for output file names."
parser.add_argument("--output_prefix", help=output_prefix, default=None)

args = parser.parse_args()
return args


def harmonize(inf, history, regions, rc, output_path, output_prefix,
return_result=False, write_output=True):
def harmonize(
inf,
history,
regions,
rc,
output_path,
output_prefix,
return_result=False,
write_output=True,
):
# check files exist
check = [inf, history, regions, rc]
for f in check:
if f and not os.path.exists(f):
raise IOError('{} does not exist on the filesystem.'.format(f))
raise IOError("{} does not exist on the filesystem.".format(f))

# read input
hist = aneris.pd_read(history, str_cols=True)
if hist.empty:
raise ValueError('History file is empty')
raise ValueError("History file is empty")
regions = aneris.pd_read(regions, str_cols=True)
if regions.empty:
raise ValueError('Region definition is empty')
raise ValueError("Region definition is empty")
model, overrides, config = aneris.read_excel(inf)
rc = aneris.RunControl(rc=rc)
rc.recursive_update('config', config)
rc.recursive_update("config", config)

# do core harmonization
driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions)
Expand All @@ -67,37 +75,40 @@ def harmonize(inf, history, regions, rc, output_path, output_prefix,

if write_output:
# write to excel
prefix = output_prefix or inf.split('.')[0]
fname = os.path.join(output_path, '{}_harmonized.xlsx'.format(prefix))
logger().info('Writing result to: {}'.format(fname))
aneris.pd_write(model, fname, sheet_name='data')
prefix = output_prefix or inf.split(".")[0]
fname = os.path.join(output_path, "{}_harmonized.xlsx".format(prefix))
logger().info("Writing result to: {}".format(fname))
aneris.pd_write(model, fname, sheet_name="data")

# save data about harmonization
fname = os.path.join(output_path, '{}_metadata.xlsx'.format(prefix))
logger().info('Writing metadata to: {}'.format(fname))
fname = os.path.join(output_path, "{}_metadata.xlsx".format(prefix))
logger().info("Writing metadata to: {}".format(fname))
aneris.pd_write(metadata, fname)

# save data about harmonization
if not diagnostics.empty:
fname = os.path.join(output_path,
'{}_diagnostics.xlsx'.format(prefix))
logger().info('Writing diagnostics to: {}'.format(fname))
fname = os.path.join(output_path, "{}_diagnostics.xlsx".format(prefix))
logger().info("Writing diagnostics to: {}".format(fname))
aneris.pd_write(diagnostics, fname)

if return_result:
return model, metadata, diagnostics




def main():
# parse cli
args = read_args()

# run program
harmonize(args.input_file, args.history, args.regions,
args.rc, args.output_path, args.output_prefix)
harmonize(
args.input_file,
args.history,
args.regions,
args.rc,
args.output_path,
args.output_prefix,
)


if __name__ == '__main__':
if __name__ == "__main__":
main()
Loading

0 comments on commit ad6301e

Please sign in to comment.