run-args

#!/usr/bin/env python3

"""
  Loads the args.csv file and run parallel simulations on them. Outputs a results.csv file in the end.
"""

import os
from os.path import realpath, dirname, join, isfile, abspath
import sys
import shutil
import subprocess
import csv
import uuid
import base64
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
from multicell_rc_utils import multicell_rc_params
from multiprocessing import Pool
import argparse
sns.set(font_scale=1.1)

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-i', '--iterations', type=int, default=5, help="Number of repeated simulations per set of arguments.")
parser.add_argument('-p', '--processes', type=int, default=2, help="Number of parallel processes to use for running simulations.")
parser.add_argument('-t', '--threads', type=int, default=2, help="Number of threads per process.")
cli_args = parser.parse_args()

root_dir = abspath(dirname(__file__))
executable = join(root_dir, "run")

# Build the Biocellion model if it hasn't been built.
if not isfile(join(root_dir, "model/libmodel.DP.SPAGENT.so")):
  subprocess.run([ "cd " + join(root_dir, "model") + "; make" ], shell=True)

# Remove the results dir of any previous runs.
results_dir = join(root_dir, "results")
shutil.rmtree(results_dir, ignore_errors=True)
os.mkdir(results_dir)

def get_random_suffix():
  return base64.urlsafe_b64encode(uuid.uuid4().bytes).rstrip(b'=').replace(b'-', b'').decode('ascii')

# This function is ran within a single process and performs all the simulation iterations for an argument set
def do_run(arg_set):
  while True:
    out_prefix = "output-" + get_random_suffix()
    if not any([ f.startswith(out_prefix) for f in os.listdir() ]): break
  with open(join(results_dir, out_prefix + "-stdout"), "w") as out, open(join(results_dir, out_prefix + "-stderr"), "w") as err:
    print("Args:", ' '.join(arg_set), file=out, flush=True)
    for i in range(cli_args.iterations):
      print("Iteration:", i + 1, file=out, flush=True)
      tempdir = out_prefix + "-#" + str(i)
      args = [ executable, "-O", tempdir ] + arg_set
      subprocess.run(args, stdout=out, stderr=err, text=True)
      shutil.rmtree(tempdir, ignore_errors=True)

# Loads the args
args = []
with open(join(root_dir, "args.csv"), 'r') as f:
  csv_reader = csv.reader(f, delimiter=',')
  for i, row in enumerate(csv_reader):
    if i == 0:
      columns = row
    else:
      arg_set = []
      for j, column in enumerate(columns):
        if j == 0: continue
        arg_set.append(column)
        if row[j].strip() != '':
          arg_set.append(row[j])
      arg_set += [ '--threads', str(cli_args.threads) ]
      args.append((arg_set,))

# Run them processes!
os.chdir(root_dir)
print("Running simulations... ", end='', flush=True)
with Pool(cli_args.processes) as pool:
  pool.starmap(do_run, args)
print("Done!")

print("Gathering results... ", end='', flush=True)
total_files = 0
valid_files = 0

# Save all data into the dictionary
data = { 'Accuracy' : [], 'Lambda ESM' : [] }

# Iterate the results dir for output files
for dirpath, dirnames, filenames in os.walk(join(root_dir, 'results')):
  for filename in filenames:
    if os.path.basename(filename).endswith('stdout'):
      total_files += 1
      with open(os.path.join(dirpath, filename), 'r') as f:
        args_found = False
        accuracy_found = False
        for line in f:
          if line.startswith('Args: '):
            # Loads the arguments provided in the output file instead of assuming which ones exist
            args = multicell_rc_params.parse_args(line.replace('Args: ', '').split())
            args_found = True
          if line.startswith('Testing accuracy:'):
            assert args_found
            # Load the arg values every time Testing accuracy is encountered for multiple iteration simulations
            for arg in vars(args):
              argname = arg.replace('_', ' ').replace('esm', 'ESM')
              argname = argname[0].upper() + argname[1:]
              argval = getattr(args, arg)
              if argname in data:
                data[argname].append(argval)
              else:
                data[argname] = [ argval ]

            lambda_ESM = math.sqrt(data['Beta ESM'][-1] / data['Alpha ESM'][-1])
            data['Lambda ESM'].append(lambda_ESM)

            accuracy = float(line.split()[2])
            assert 0.0 <= accuracy <= 1.0
            data['Accuracy'].append(accuracy)
            accuracy_found = True

        # Keep track of how many simulations were successful
        if args_found and accuracy_found: valid_files += 1

# These are unnecessary, as lambda can substitute for both
data.pop('Alpha ESM', None)
data.pop('Beta ESM', None)

print("Done!")
print("Valid / total files: {} / {} ({}%)".format(valid_files, total_files, valid_files / total_files * 100))

# Save to CSV
data = pd.DataFrame(data)
data.to_csv(join(root_dir, 'results.csv'))