-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/pacemaker2022 #417
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
df8e2bd
WIP: first draft of Pacemaker2022
53ba58c
WIP: elif self._job_dict: raise NotImplementedError()
b89fe5d
Pacemaker job:
37f85d9
rename Pacemaker2022 to PacemakerJob
14e2ccc
add pacemaker_example.ipynb
bc48b0b
removed log setters
b05890f
Do not import from pyiron in example notebook
pmrv c50a420
Remove example notebook
pmrv File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,388 @@ | ||
# coding: utf-8 | ||
# Copyright (c) ICAMS, Ruhr University Bochum, 2022 | ||
|
||
## Executable required: $pyiron/resources/pacemaker/bin/run_pacemaker_tf_cpu.sh AND run_pacemaker_tf.sh | ||
|
||
import logging | ||
from typing import List | ||
|
||
import numpy as np | ||
import os | ||
import pandas as pd | ||
import ruamel.yaml as yaml | ||
|
||
from shutil import copyfile | ||
|
||
from pyiron_base import GenericJob, GenericParameters, state, Executable, FlattenedStorage | ||
|
||
from pyiron_contrib.atomistics.atomistics.job.trainingcontainer import TrainingStorage, TrainingContainer | ||
from pyiron_contrib.atomistics.ml.potentialfit import PotentialFit | ||
|
||
from pyiron_atomistics.atomistics.structure.atoms import Atoms as pyironAtoms, ase_to_pyiron | ||
from ase.atoms import Atoms as aseAtoms | ||
|
||
s = state.settings | ||
|
||
|
||
class PacemakerJob(GenericJob, PotentialFit): | ||
|
||
def __init__(self, project, job_name): | ||
super().__init__(project, job_name) | ||
self.__name__ = "Pacemaker2022" | ||
self.__version__ = "0.2" | ||
|
||
self._train_job_id_list = [] | ||
|
||
self.input = GenericParameters(table_name="input") | ||
self._cutoff = 7.0 | ||
self.input['cutoff'] = self._cutoff | ||
self.input['metadata'] = {'comment': 'pyiron-generated fitting job'} | ||
|
||
# data_config | ||
self.input['data'] = {} | ||
# potential_config | ||
self.input['potential'] = { | ||
"elements": [], | ||
"bonds": { | ||
"ALL": { | ||
"radbase": "SBessel", | ||
"rcut": self._cutoff, | ||
"dcut": 0.01, | ||
"radparameters": [5.25] | ||
} | ||
}, | ||
|
||
"embeddings": { | ||
"ALL": { | ||
"fs_parameters": [1, 1, 1, 0.5], | ||
"ndensity": 2, | ||
"npot": "FinnisSinclairShiftedScaled" | ||
} | ||
}, | ||
|
||
"functions": { | ||
"ALL": { | ||
"nradmax_by_orders": [15, 3, 2, 1], | ||
"lmax_by_orders": [0, 3, 2, 1], | ||
} | ||
} | ||
} | ||
|
||
# fit_config | ||
self.input['fit'] = { | ||
"loss": {"L1_coeffs": 1e-8, "L2_coeffs": 1e-8, "kappa": 0.3, "w0_rad": 0, | ||
"w1_rad": 0, "w2_rad": 0}, | ||
"maxiter": 1000, | ||
"optimizer": "BFGS", | ||
"fit_cycles": 1 | ||
} | ||
self.input['backend'] = {"batch_size": 100, | ||
"display_step": 50, | ||
"evaluator": "tensorpot"} # backend_config | ||
|
||
self.structure_data = None | ||
self._executable = None | ||
self._executable_activate() | ||
|
||
state.publications.add(self.publication) | ||
|
||
@property | ||
def elements(self): | ||
return self.input["potential"].get("elements") | ||
|
||
@elements.setter | ||
def elements(self, val): | ||
self.input["potential"]["elements"] = val | ||
|
||
@property | ||
def cutoff(self): | ||
return self._cutoff | ||
|
||
@cutoff.setter | ||
def cutoff(self, val): | ||
self._cutoff = val | ||
self.input["cutoff"] = self._cutoff | ||
self.input["potential"]["bonds"]["ALL"]["rcut"] = self._cutoff | ||
|
||
@property | ||
def publication(self): | ||
return { | ||
"pacemaker": [ | ||
{ | ||
"title": "Efficient parametrization of the atomic cluster expansion", | ||
"journal": "Physical Review Materials", | ||
"volume": "6", | ||
"number": "1", | ||
"year": "2022", | ||
"doi": "10.1103/PhysRevMaterials.6.013804", | ||
"url": "https://doi.org/10.1103/PhysRevMaterials.6.013804", | ||
"author": ["Anton Bochkarev", "Yury Lysogorskiy", "Sarath Menon", "Minaam Qamar", "Matous Mrovec", | ||
"Ralf Drautz"], | ||
}, | ||
|
||
{ | ||
"title": "Performant implementation of the atomic cluster expansion (PACE) and application to copper and silicon", | ||
"journal": "npj Computational Materials", | ||
"volume": "7", | ||
"number": "1", | ||
"year": "2021", | ||
"doi": "10.1038/s41524-021-00559-9", | ||
"url": "https://doi.org/10.1038/s41524-021-00559-9", | ||
"author": ["Yury Lysogorskiy", "Cas van der Oord", "Anton Bochkarev", "Sarath Menon", | ||
"Matteo Rinaldi", | ||
"Thomas Hammerschmidt", "Matous Mrovec", "Aidan Thompson", "Gábor Csányi", | ||
"Christoph Ortner", | ||
"Ralf Drautz"], | ||
}, | ||
{ | ||
"title": "Atomic cluster expansion for accurate and transferable interatomic potentials", | ||
"journal": "Physical Review B", | ||
"volume": "99", | ||
"year": "2019", | ||
"doi": "10.1103/PhysRevB.99.014104", | ||
"url": "https://doi.org/10.1103/PhysRevB.99.014104", | ||
"author": ["Ralf Drautz"], | ||
}, | ||
] | ||
} | ||
|
||
def _save_structure_dataframe_pckl_gzip(self, df): | ||
|
||
if "NUMBER_OF_ATOMS" not in df.columns and "number_of_atoms" in df.columns: | ||
df.rename(columns={"number_of_atoms": "NUMBER_OF_ATOMS"}, inplace=True) | ||
df["NUMBER_OF_ATOMS"] = df["NUMBER_OF_ATOMS"].astype(int) | ||
|
||
# TODO: reference energy subtraction ? | ||
if "energy_corrected" not in df.columns and "energy" in df.columns: | ||
df.rename(columns={"energy": "energy_corrected"}, inplace=True) | ||
|
||
if "atoms" in df.columns: | ||
# check if this is pyironAtoms -> aseAtoms | ||
at = df.iloc[0]["atoms"] | ||
if isinstance(at, pyironAtoms): | ||
df["ase_atoms"] = df["atoms"].map(lambda s: s.to_ase()) | ||
df.drop(columns=["atoms"], inplace=True) | ||
else: | ||
assert isinstance(at, aseAtoms), "'atoms' column is not a valid ASE Atoms object" | ||
df.rename(columns={"atoms": "ase_atom"}, inplace=True) | ||
elif "ase_atoms" not in df.columns: | ||
raise ValueError("DataFrame should contain 'atoms' (pyiron Atoms) or 'ase_atoms' (ASE atoms) columns") | ||
|
||
if "stress" in df.columns: | ||
df.drop(columns=["stress"], inplace=True) | ||
|
||
if "pbc" not in df.columns: | ||
df["pbc"] = df["ase_atoms"].map(lambda atoms: np.all(atoms.pbc)) | ||
|
||
data_file_name = os.path.join(self.working_directory, "df_fit.pckl.gzip") | ||
logging.info("Saving training structures dataframe into {} with pickle protocol = 4, compression = gzip".format( | ||
data_file_name)) | ||
df.to_pickle(data_file_name, compression="gzip", protocol=4) | ||
return data_file_name | ||
|
||
def write_input(self): | ||
# prepare datafile | ||
if self._train_job_id_list and self.structure_data is None: | ||
train_df = self.create_training_dataframe(self._train_job_id_list) | ||
self.structure_data = train_df | ||
|
||
if isinstance(self.structure_data, pd.DataFrame): | ||
logging.info("structure_data is pandas.DataFrame") | ||
data_file_name = self._save_structure_dataframe_pckl_gzip(self.structure_data) | ||
self.input["data"] = {"filename": data_file_name} | ||
elements_set = set() | ||
for at in self.structure_data["ase_atoms"]: | ||
elements_set.update(at.get_chemical_symbols()) | ||
elements = sorted(elements_set) | ||
print("Set automatically determined list of elements: {}".format(elements)) | ||
self.elements = elements | ||
elif isinstance(self.structure_data, str): # filename | ||
if os.path.isfile(self.structure_data): | ||
logging.info("structure_data is valid file path") | ||
self.input["data"] = {"filename": self.structure_data} | ||
else: | ||
raise ValueError("Provided structure_data filename ({}) doesn't exists".format(self.structure_data)) | ||
elif hasattr(self.structure_data, "get_pandas"): # duck-typing check for TrainingContainer | ||
logging.info("structure_data is TrainingContainer") | ||
df = self.structure_data.to_pandas() | ||
data_file_name = self._save_structure_dataframe_pckl_gzip(df) | ||
self.input["data"] = {"filename": data_file_name} | ||
elif self.structure_data is None: | ||
raise ValueError( | ||
"`structure_data` is none, but should be pd.DataFrame, TrainingContainer or valid pickle.gzip filename") | ||
|
||
metadata_dict = self.input["metadata"] | ||
metadata_dict["pyiron_job_id"] = str(self.job_id) | ||
|
||
input_yaml_dict = { | ||
"cutoff": self.input["cutoff"], | ||
"metadata": metadata_dict, | ||
"potential": self.input["potential"], | ||
"data": self.input["data"], | ||
"fit": self.input["fit"], | ||
'backend': self.input["backend"], | ||
} | ||
|
||
if isinstance(self.input["potential"], str): | ||
pot_file_name = self.input["potential"] | ||
if os.path.isfile(pot_file_name): | ||
logging.info("Input potential is filename") | ||
pot_basename = os.path.basename(pot_file_name) | ||
copyfile(pot_file_name, os.path.join(self.working_directory, pot_basename)) | ||
input_yaml_dict['potential'] = pot_basename | ||
else: | ||
raise ValueError("Provided potential filename ({}) doesn't exists".format(self.input["potential"])) | ||
|
||
with open(os.path.join(self.working_directory, "input.yaml"), "w") as f: | ||
yaml.dump(input_yaml_dict, f) | ||
|
||
def _analyse_log(self, logfile="metrics.txt"): | ||
metrics_filename = os.path.join(self.working_directory, logfile) | ||
|
||
metrics_df = pd.read_csv(metrics_filename, sep="\s+") | ||
res_dict = metrics_df.to_dict(orient="list") | ||
return res_dict | ||
|
||
def collect_output(self): | ||
final_potential_filename_yaml = self.get_final_potential_filename() | ||
with open(final_potential_filename_yaml, "r") as f: | ||
yaml_lines = f.readlines() | ||
final_potential_yaml_string = "".join(yaml_lines) | ||
|
||
with open(self.get_final_potential_filename_ace(), "r") as f: | ||
ace_lines = f.readlines() | ||
final_potential_yace_string = "".join(ace_lines) | ||
|
||
with open(self.get_final_potential_filename_ace(), "r") as f: | ||
yace_data = yaml.safe_load(f) | ||
|
||
elements_name = yace_data["elements"] | ||
|
||
with self.project_hdf5.open("output/potential") as h5out: | ||
h5out["yaml"] = final_potential_yaml_string | ||
h5out["yace"] = final_potential_yace_string | ||
h5out["elements_name"] = elements_name | ||
|
||
log_res_dict = self._analyse_log() | ||
|
||
with self.project_hdf5.open("output/log") as h5out: | ||
for key, arr in log_res_dict.items(): | ||
h5out[key] = arr | ||
|
||
# training data | ||
training_data_fname = os.path.join(self.working_directory, "fitting_data_info.pckl.gzip") | ||
df = pd.read_pickle(training_data_fname, compression="gzip") | ||
df["atoms"] = df.ase_atoms.map(ase_to_pyiron) | ||
training_data_ts = TrainingStorage() | ||
for _, r in df.iterrows(): | ||
training_data_ts.add_structure(r.atoms, | ||
energy=r.energy_corrected, | ||
forces=r.forces, | ||
identifier=r['name']) | ||
|
||
# predicted data | ||
predicted_fname = os.path.join(self.working_directory, "train_pred.pckl.gzip") | ||
df = pd.read_pickle(predicted_fname, compression="gzip") | ||
predicted_data_fs = FlattenedStorage() | ||
predicted_data_fs.add_array('energy', dtype=np.float64, shape=(), per='chunk') | ||
predicted_data_fs.add_array('energy_true', dtype=np.float64, shape=(), per='chunk') | ||
|
||
predicted_data_fs.add_array('number_of_atoms', dtype=np.int, shape=(), per='chunk') | ||
|
||
predicted_data_fs.add_array('forces', dtype=np.float64, shape=(3,), per='element') | ||
predicted_data_fs.add_array('forces_true', dtype=np.float64, shape=(3,), per='element') | ||
for i, r in df.iterrows(): | ||
identifier = r['name'] if "name" in r else str(i) | ||
predicted_data_fs.add_chunk(r["NUMBER_OF_ATOMS"], identifier=identifier, | ||
energy=r.energy_pred, | ||
forces=r.forces_pred, | ||
energy_true=r.energy_corrected, | ||
forces_true=r.forces, | ||
number_of_atoms = r.NUMBER_OF_ATOMS, | ||
|
||
energy_per_atom = r.energy_pred / r.NUMBER_OF_ATOMS, | ||
energy_per_atom_true=r.energy_corrected / r.NUMBER_OF_ATOMS, | ||
) | ||
|
||
with self.project_hdf5.open("output") as hdf5_output: | ||
training_data_ts.to_hdf(hdf=hdf5_output, group_name="training_data") | ||
predicted_data_fs.to_hdf(hdf=hdf5_output, group_name="predicted_data") | ||
|
||
def get_lammps_potential(self): | ||
elements_name = self["output/potential/elements_name"] | ||
elem = " ".join(elements_name) | ||
pot_file_name = self.get_final_potential_filename_ace() | ||
pot_dict = { | ||
'Config': [["pair_style pace\n", "pair_coeff * * {} {}\n".format(pot_file_name, elem)]], | ||
'Filename': [""], | ||
'Model': ["ACE"], | ||
'Name': [self.job_name], | ||
'Species': [elements_name] | ||
} | ||
|
||
ace_potential = pd.DataFrame(pot_dict) | ||
|
||
return ace_potential | ||
|
||
def to_hdf(self, hdf=None, group_name=None): | ||
super().to_hdf( | ||
hdf=hdf, | ||
group_name=group_name | ||
) | ||
with self.project_hdf5.open("input") as h5in: | ||
self.input.to_hdf(h5in) | ||
|
||
def from_hdf(self, hdf=None, group_name=None): | ||
super().from_hdf( | ||
hdf=hdf, | ||
group_name=group_name | ||
) | ||
with self.project_hdf5.open("input") as h5in: | ||
self.input.from_hdf(h5in) | ||
|
||
def get_final_potential_filename(self): | ||
return os.path.join(self.working_directory, "output_potential.yaml") | ||
|
||
def get_final_potential_filename_ace(self): | ||
return os.path.join(self.working_directory, "output_potential.yace") | ||
|
||
def get_current_potential_filename(self): | ||
return os.path.join(self.working_directory, "interim_potential_0.yaml") | ||
|
||
# To link to the executable from the notebook | ||
def _executable_activate(self, enforce=False, codename="pacemaker"): | ||
if self._executable is None or enforce: | ||
self._executable = Executable( | ||
codename=codename, module="pacemaker", path_binary_codes=state.settings.resource_paths | ||
) | ||
|
||
def _add_training_data(self, container: TrainingContainer) -> None: | ||
self.add_job_to_fitting(container.id) | ||
|
||
def add_job_to_fitting(self, job_id, *args, **kwargs): | ||
self._train_job_id_list.append(job_id) | ||
|
||
def _get_training_data(self) -> TrainingStorage: | ||
return self["output/training_data"].to_object() | ||
|
||
def _get_predicted_data(self) -> FlattenedStorage: | ||
return self["output/predicted_data"].to_object() | ||
|
||
# copied/adapted from mlip.py | ||
def create_training_dataframe(self, _train_job_id_list: List = None) -> pd.DataFrame: | ||
if _train_job_id_list is None: | ||
_train_job_id_list = self._train_job_id_list | ||
df_list = [] | ||
for job_id in _train_job_id_list: | ||
ham = self.project.inspect(job_id) | ||
if ham.__name__ == "TrainingContainer": | ||
job = ham.to_object() | ||
data_df = job.to_pandas() | ||
df_list.append(data_df) | ||
else: | ||
raise NotImplementedError("Currently only TrainingContainer is supported") | ||
|
||
total_training_df = pd.concat(df_list, axis=0) | ||
total_training_df.reset_index(drop=True, inplace=True) | ||
|
||
return total_training_df |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should use
DataContainer
, but I can also do that once this is merged.