diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_acrobot_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from env_acrobot_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_cheetah_run/DynaSAC_Bounded_01_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_10/train_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_01_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_10/train_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_cheetah_run/DynaSAC_Bounded_03_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/train_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_03_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_2/train_config.json diff --git a/env_cheetah_run/DynaSAC_Bounded_01_5/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_01_5/alg_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_5/alg_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_5/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_5/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_cheetah_run/DynaSAC_Bounded_05_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_5/train_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_05_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_01_5/train_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_cheetah_run/DynaSAC_Bounded_07_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/train_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_07_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_10/train_config.json diff --git a/env_humanoid_run/DynaSAC_Bounded_03_2/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_2/alg_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_03_2/alg_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_2/alg_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_cheetah_run/DynaSAC_Bounded_09_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_2/train_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_09_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_2/train_config.json diff --git a/env_cheetah_run/DynaSAC_Bounded_03_5/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_03_5/alg_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_5/alg_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_cheetah_run/Dyna_SAC_1_10/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_5/train_config.json similarity index 100% rename from env_cheetah_run/Dyna_SAC_1_10/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_03_5/train_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_01_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/train_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_01_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_10/train_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_03_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/train_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_03_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_2/train_config.json diff --git a/env_cheetah_run/DynaSAC_Bounded_05_5/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 100% rename from env_cheetah_run/DynaSAC_Bounded_05_5/alg_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_5/alg_config.json diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_05_5/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_5/train_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_05_5/train_config.json rename to env_acrobot_swingup/Dyna/DynaSAC_Bounded_05_5/train_config.json diff --git a/env_cheetah_run/Dyna_SAC_1_10/alg_config.json b/env_acrobot_swingup/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from env_cheetah_run/Dyna_SAC_1_10/alg_config.json rename to env_acrobot_swingup/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_acrobot_swingup/Dyna/Dyna_SAC_1_10/env_config.json b/env_acrobot_swingup/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_07_5/train_config.json b/env_acrobot_swingup/Dyna/Dyna_SAC_1_10/train_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_07_5/train_config.json rename to env_acrobot_swingup/Dyna/Dyna_SAC_1_10/train_config.json diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cartpole_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from env_cartpole_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_finger_turn_hard/DynaSAC_Bounded_09_5/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_10/train_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_09_5/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_10/train_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_finger_turn_hard/Dyna_SAC_1_10/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/train_config.json similarity index 100% rename from env_finger_turn_hard/Dyna_SAC_1_10/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_2/train_config.json diff --git a/env_finger_turn_hard/DynaSAC_Bounded_01_5/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_01_5/alg_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_5/alg_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_5/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_5/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_fish_swim/DynaSAC_Bounded_01_5/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_5/train_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_01_5/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_01_5/train_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_fish_swim/DynaSAC_Bounded_03_5/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/train_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_03_5/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_10/train_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_fish_swim/DynaSAC_Bounded_05_5/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/train_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_05_5/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_2/train_config.json diff --git a/env_finger_turn_hard/DynaSAC_Bounded_03_5/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_03_5/alg_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_5/alg_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_fish_swim/DynaSAC_Bounded_07_5/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_5/train_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_07_5/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_03_5/train_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_fish_swim/DynaSAC_Bounded_09_5/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/train_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_09_5/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_10/train_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_fish_swim/Dyna_SAC_1_10/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/train_config.json similarity index 100% rename from env_fish_swim/Dyna_SAC_1_10/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_2/train_config.json diff --git a/env_finger_turn_hard/DynaSAC_Bounded_05_5/alg_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 100% rename from env_finger_turn_hard/DynaSAC_Bounded_05_5/alg_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_5/alg_config.json diff --git a/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_hopper_hop/DynaSAC_Bounded_01_5/train_config.json b/env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_5/train_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_01_5/train_config.json rename to env_cartpole_swingup/Dyna/DynaSAC_Bounded_05_5/train_config.json diff --git a/env_finger_turn_hard/Dyna_SAC_1_10/alg_config.json b/env_cartpole_swingup/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from env_finger_turn_hard/Dyna_SAC_1_10/alg_config.json rename to env_cartpole_swingup/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_cartpole_swingup/Dyna/Dyna_SAC_1_10/env_config.json b/env_cartpole_swingup/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_hopper_hop/DynaSAC_Bounded_03_5/train_config.json b/env_cartpole_swingup/Dyna/Dyna_SAC_1_10/train_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_03_5/train_config.json rename to env_cartpole_swingup/Dyna/Dyna_SAC_1_10/train_config.json diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/alg_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/env_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/train_config.json b/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cartpole_swingup/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/STEVESAC/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from STEVESAC/env_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_hopper_hop/DynaSAC_Bounded_05_5/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_10/train_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_05_5/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_01_10/train_config.json diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/STEVESAC_Bounded/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_2/env_config.json similarity index 100% rename from STEVESAC_Bounded/env_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_01_2/env_config.json diff --git a/env_hopper_hop/DynaSAC_Bounded_07_5/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_2/train_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_07_5/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_01_2/train_config.json diff --git a/env_fish_swim/DynaSAC_Bounded_01_5/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_01_5/alg_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_01_5/alg_config.json diff --git a/env_cheetah_run/Dyna_SAC_1_10/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_5/env_config.json similarity index 100% rename from env_cheetah_run/Dyna_SAC_1_10/env_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_01_5/env_config.json diff --git a/env_hopper_hop/DynaSAC_Bounded_09_5/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_01_5/train_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_09_5/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_01_5/train_config.json diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/sample_Dyna_SAC/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_10/env_config.json similarity index 100% rename from sample_Dyna_SAC/env_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_03_10/env_config.json diff --git a/env_hopper_hop/Dyna_SAC_1_10/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_10/train_config.json similarity index 100% rename from env_hopper_hop/Dyna_SAC_1_10/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_03_10/train_config.json diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cheetah_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_2/env_config.json similarity index 100% rename from env_cheetah_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_03_2/env_config.json diff --git a/env_humanoid_run/DynaSAC_Bounded_01_5/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_2/train_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_01_5/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_03_2/train_config.json diff --git a/env_fish_swim/DynaSAC_Bounded_03_5/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_03_5/alg_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_03_5/alg_config.json diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_humanoid_run/DynaSAC_Bounded_03_2/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_03_5/train_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_03_2/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_03_5/train_config.json diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_humanoid_run/DynaSAC_Bounded_03_5/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/train_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_03_5/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_05_10/train_config.json diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_humanoid_run/DynaSAC_Bounded_05_5/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/train_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_05_5/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_05_2/train_config.json diff --git a/env_fish_swim/DynaSAC_Bounded_05_5/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 100% rename from env_fish_swim/DynaSAC_Bounded_05_5/alg_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_05_5/alg_config.json diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_humanoid_run/DynaSAC_Bounded_07_5/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_05_5/train_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_07_5/train_config.json rename to env_cheetah_run/Dyna/DynaSAC_Bounded_05_5/train_config.json diff --git a/env_fish_swim/Dyna_SAC_1_10/alg_config.json b/env_cheetah_run/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from env_fish_swim/Dyna_SAC_1_10/alg_config.json rename to env_cheetah_run/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_cheetah_run/Dyna/Dyna_SAC_1_10/env_config.json b/env_cheetah_run/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_humanoid_run/DynaSAC_Bounded_09_5/train_config.json b/env_cheetah_run/Dyna/Dyna_SAC_1_10/train_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_09_5/train_config.json rename to env_cheetah_run/Dyna/Dyna_SAC_1_10/train_config.json diff --git a/env_cheetah_run/DynaSAC_Bounded_01_5/env_config.json b/env_cheetah_run/DynaSAC_Bounded_01_5/env_config.json deleted file mode 100644 index bba6d4c..0000000 --- a/env_cheetah_run/DynaSAC_Bounded_01_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "cheetah", - "task": "run" -} diff --git a/env_cheetah_run/DynaSAC_Bounded_03_5/env_config.json b/env_cheetah_run/DynaSAC_Bounded_03_5/env_config.json deleted file mode 100644 index bba6d4c..0000000 --- a/env_cheetah_run/DynaSAC_Bounded_03_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "cheetah", - "task": "run" -} diff --git a/env_cheetah_run/DynaSAC_Bounded_05_5/env_config.json b/env_cheetah_run/DynaSAC_Bounded_05_5/env_config.json deleted file mode 100644 index bba6d4c..0000000 --- a/env_cheetah_run/DynaSAC_Bounded_05_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "cheetah", - "task": "run" -} diff --git a/env_cheetah_run/DynaSAC_Bounded_07_5/env_config.json b/env_cheetah_run/DynaSAC_Bounded_07_5/env_config.json deleted file mode 100644 index bba6d4c..0000000 --- a/env_cheetah_run/DynaSAC_Bounded_07_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "cheetah", - "task": "run" -} diff --git a/env_cheetah_run/DynaSAC_Bounded_09_5/env_config.json b/env_cheetah_run/DynaSAC_Bounded_09_5/env_config.json deleted file mode 100644 index bba6d4c..0000000 --- a/env_cheetah_run/DynaSAC_Bounded_09_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "cheetah", - "task": "run" -} diff --git a/env_cheetah_run/STEVE/STEVESAC_3/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_10/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_10/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_10/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_2/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_2/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_2/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_5/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_5/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_01_5/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_10/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_10/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_10/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_2/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_2/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_2/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_5/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_5/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_03_5/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_10/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_10/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_10/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_2/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_2/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_2/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_5/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_5/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_05_5/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_finger_turn_hard/Dyna_SAC_1_10/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from env_finger_turn_hard/Dyna_SAC_1_10/env_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_humanoid_run/Dyna_SAC_1_10/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_10/train_config.json similarity index 100% rename from env_humanoid_run/Dyna_SAC_1_10/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_10/train_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_2/env_config.json similarity index 100% rename from env_finger_turn_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_2/env_config.json diff --git a/env_reacher_hard/DynaSAC_Bounded_01_5/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_2/train_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_01_5/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_2/train_config.json diff --git a/env_hopper_hop/DynaSAC_Bounded_01_5/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_01_5/alg_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_5/alg_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_5/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_5/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_reacher_hard/DynaSAC_Bounded_03_5/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_5/train_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_03_5/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_01_5/train_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_reacher_hard/DynaSAC_Bounded_05_5/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/train_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_05_5/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_10/train_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_reacher_hard/DynaSAC_Bounded_07_5/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/train_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_07_5/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_2/train_config.json diff --git a/env_hopper_hop/DynaSAC_Bounded_03_5/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_03_5/alg_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_5/alg_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_reacher_hard/DynaSAC_Bounded_09_5/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_5/train_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_09_5/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_03_5/train_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_reacher_hard/Dyna_SAC_1_10/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/train_config.json similarity index 100% rename from env_reacher_hard/Dyna_SAC_1_10/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_10/train_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_walker_walk/DynaSAC_Bounded_01_5/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/train_config.json similarity index 100% rename from env_walker_walk/DynaSAC_Bounded_01_5/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_2/train_config.json diff --git a/env_hopper_hop/DynaSAC_Bounded_05_5/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 100% rename from env_hopper_hop/DynaSAC_Bounded_05_5/alg_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_5/alg_config.json diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_walker_walk/DynaSAC_Bounded_03_5/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_5/train_config.json similarity index 100% rename from env_walker_walk/DynaSAC_Bounded_03_5/train_config.json rename to env_finger_turn_hard/Dyna/DynaSAC_Bounded_05_5/train_config.json diff --git a/env_hopper_hop/Dyna_SAC_1_10/alg_config.json b/env_finger_turn_hard/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from env_hopper_hop/Dyna_SAC_1_10/alg_config.json rename to env_finger_turn_hard/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_finger_turn_hard/Dyna/Dyna_SAC_1_10/env_config.json b/env_finger_turn_hard/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_walker_walk/DynaSAC_Bounded_05_5/train_config.json b/env_finger_turn_hard/Dyna/Dyna_SAC_1_10/train_config.json similarity index 100% rename from env_walker_walk/DynaSAC_Bounded_05_5/train_config.json rename to env_finger_turn_hard/Dyna/Dyna_SAC_1_10/train_config.json diff --git a/env_finger_turn_hard/DynaSAC_Bounded_01_5/env_config.json b/env_finger_turn_hard/DynaSAC_Bounded_01_5/env_config.json deleted file mode 100644 index 04eb0fb..0000000 --- a/env_finger_turn_hard/DynaSAC_Bounded_01_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "finger", - "task": "turn_hard" -} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_03_5/env_config.json b/env_finger_turn_hard/DynaSAC_Bounded_03_5/env_config.json deleted file mode 100644 index 04eb0fb..0000000 --- a/env_finger_turn_hard/DynaSAC_Bounded_03_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "finger", - "task": "turn_hard" -} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_05_5/env_config.json b/env_finger_turn_hard/DynaSAC_Bounded_05_5/env_config.json deleted file mode 100644 index 04eb0fb..0000000 --- a/env_finger_turn_hard/DynaSAC_Bounded_05_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "finger", - "task": "turn_hard" -} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_07_5/env_config.json b/env_finger_turn_hard/DynaSAC_Bounded_07_5/env_config.json deleted file mode 100644 index 04eb0fb..0000000 --- a/env_finger_turn_hard/DynaSAC_Bounded_07_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "finger", - "task": "turn_hard" -} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_09_5/env_config.json b/env_finger_turn_hard/DynaSAC_Bounded_09_5/env_config.json deleted file mode 100644 index 04eb0fb..0000000 --- a/env_finger_turn_hard/DynaSAC_Bounded_09_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "finger", - "task": "turn_hard" -} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_fish_swim/Dyna_SAC_1_10/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from env_fish_swim/Dyna_SAC_1_10/env_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_walker_walk/DynaSAC_Bounded_07_5/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_10/train_config.json similarity index 100% rename from env_walker_walk/DynaSAC_Bounded_07_5/train_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_01_10/train_config.json diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_2/env_config.json similarity index 100% rename from env_fish_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_01_2/env_config.json diff --git a/env_walker_walk/DynaSAC_Bounded_09_5/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_2/train_config.json similarity index 100% rename from env_walker_walk/DynaSAC_Bounded_09_5/train_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_01_2/train_config.json diff --git a/env_humanoid_run/DynaSAC_Bounded_01_5/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_01_5/alg_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_01_5/alg_config.json diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_01_5/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_5/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_walker_walk/Dyna_SAC_1_10/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_01_5/train_config.json similarity index 100% rename from env_walker_walk/Dyna_SAC_1_10/train_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_01_5/train_config.json diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/DynaSAC_Bounded_03_5/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_03_5/alg_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_03_5/alg_config.json diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_03_5/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_03_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/DynaSAC_Bounded_05_5/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 100% rename from env_humanoid_run/DynaSAC_Bounded_05_5/alg_config.json rename to env_fish_swim/Dyna/DynaSAC_Bounded_05_5/alg_config.json diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_05_5/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_05_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna_SAC_1_10/alg_config.json b/env_fish_swim/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from env_humanoid_run/Dyna_SAC_1_10/alg_config.json rename to env_fish_swim/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_fish_swim/Dyna/Dyna_SAC_1_10/env_config.json b/env_fish_swim/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/Dyna_SAC_1_10/train_config.json b/env_fish_swim/Dyna/Dyna_SAC_1_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/Dyna_SAC_1_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/DynaSAC_Bounded_01_5/env_config.json b/env_fish_swim/DynaSAC_Bounded_01_5/env_config.json deleted file mode 100644 index 1e588dc..0000000 --- a/env_fish_swim/DynaSAC_Bounded_01_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "fish", - "task": "swim" -} diff --git a/env_fish_swim/DynaSAC_Bounded_03_5/env_config.json b/env_fish_swim/DynaSAC_Bounded_03_5/env_config.json deleted file mode 100644 index 1e588dc..0000000 --- a/env_fish_swim/DynaSAC_Bounded_03_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "fish", - "task": "swim" -} diff --git a/env_fish_swim/DynaSAC_Bounded_05_5/env_config.json b/env_fish_swim/DynaSAC_Bounded_05_5/env_config.json deleted file mode 100644 index 1e588dc..0000000 --- a/env_fish_swim/DynaSAC_Bounded_05_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "fish", - "task": "swim" -} diff --git a/env_fish_swim/DynaSAC_Bounded_07_5/alg_config.json b/env_fish_swim/DynaSAC_Bounded_07_5/alg_config.json deleted file mode 100644 index e8c9014..0000000 --- a/env_fish_swim/DynaSAC_Bounded_07_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.7, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_fish_swim/DynaSAC_Bounded_07_5/env_config.json b/env_fish_swim/DynaSAC_Bounded_07_5/env_config.json deleted file mode 100644 index 9b127b1..0000000 --- a/env_fish_swim/DynaSAC_Bounded_07_5/env_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "gym": "dmcs", - "domain": "fish", - "task": "swim" -} - diff --git a/env_fish_swim/DynaSAC_Bounded_09_5/alg_config.json b/env_fish_swim/DynaSAC_Bounded_09_5/alg_config.json deleted file mode 100644 index 4923f37..0000000 --- a/env_fish_swim/DynaSAC_Bounded_09_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.9, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_fish_swim/DynaSAC_Bounded_09_5/env_config.json b/env_fish_swim/DynaSAC_Bounded_09_5/env_config.json deleted file mode 100644 index 1e588dc..0000000 --- a/env_fish_swim/DynaSAC_Bounded_09_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "fish", - "task": "swim" -} diff --git a/env_fish_swim/STEVE/STEVESAC_3/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3/env_config.json b/env_fish_swim/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3/train_config.json b/env_fish_swim/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_10/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_10/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_10/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_2/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_2/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_2/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_5/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_5/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_01_5/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_10/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_10/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_10/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_2/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_2/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_2/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_5/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_5/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_03_5/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_10/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_10/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_10/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_2/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_2/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_2/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_5/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_5/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_05_5/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/DynaSAC_Bounded/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from DynaSAC_Bounded/env_config.json rename to env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/Dyna_SAC_1_10/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/env_config.json similarity index 100% rename from env_hopper_hop/Dyna_SAC_1_10/env_config.json rename to env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/env_config.json diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/DynaSAC_Bounded_01_5/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_01_5/alg_config.json rename to env_hopper_hop/Dyna/DynaSAC_Bounded_01_5/alg_config.json diff --git a/env_hopper_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_5/env_config.json similarity index 100% rename from env_hopper_config.json rename to env_hopper_hop/Dyna/DynaSAC_Bounded_01_5/env_config.json diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_01_5/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/DynaSAC_Bounded_03_5/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_03_5/alg_config.json rename to env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/alg_config.json diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/DynaSAC_Bounded_05_5/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 100% rename from env_reacher_hard/DynaSAC_Bounded_05_5/alg_config.json rename to env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/alg_config.json diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna_SAC_1_10/alg_config.json b/env_hopper_hop/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from env_reacher_hard/Dyna_SAC_1_10/alg_config.json rename to env_hopper_hop/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_hopper_hop/Dyna/Dyna_SAC_1_10/env_config.json b/env_hopper_hop/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/Dyna_SAC_1_10/train_config.json b/env_hopper_hop/Dyna/Dyna_SAC_1_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/Dyna_SAC_1_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/DynaSAC_Bounded_01_5/env_config.json b/env_hopper_hop/DynaSAC_Bounded_01_5/env_config.json deleted file mode 100644 index 07f6b79..0000000 --- a/env_hopper_hop/DynaSAC_Bounded_01_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "hopper", - "task": "hop" -} diff --git a/env_hopper_hop/DynaSAC_Bounded_03_5/env_config.json b/env_hopper_hop/DynaSAC_Bounded_03_5/env_config.json deleted file mode 100644 index 07f6b79..0000000 --- a/env_hopper_hop/DynaSAC_Bounded_03_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "hopper", - "task": "hop" -} diff --git a/env_hopper_hop/DynaSAC_Bounded_05_5/env_config.json b/env_hopper_hop/DynaSAC_Bounded_05_5/env_config.json deleted file mode 100644 index 07f6b79..0000000 --- a/env_hopper_hop/DynaSAC_Bounded_05_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "hopper", - "task": "hop" -} diff --git a/env_hopper_hop/DynaSAC_Bounded_07_5/alg_config.json b/env_hopper_hop/DynaSAC_Bounded_07_5/alg_config.json deleted file mode 100644 index e8c9014..0000000 --- a/env_hopper_hop/DynaSAC_Bounded_07_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.7, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_hopper_hop/DynaSAC_Bounded_07_5/env_config.json b/env_hopper_hop/DynaSAC_Bounded_07_5/env_config.json deleted file mode 100644 index 07f6b79..0000000 --- a/env_hopper_hop/DynaSAC_Bounded_07_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "hopper", - "task": "hop" -} diff --git a/env_hopper_hop/DynaSAC_Bounded_09_5/alg_config.json b/env_hopper_hop/DynaSAC_Bounded_09_5/alg_config.json deleted file mode 100644 index 4923f37..0000000 --- a/env_hopper_hop/DynaSAC_Bounded_09_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.9, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_hopper_hop/DynaSAC_Bounded_09_5/env_config.json b/env_hopper_hop/DynaSAC_Bounded_09_5/env_config.json deleted file mode 100644 index 07f6b79..0000000 --- a/env_hopper_hop/DynaSAC_Bounded_09_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "hopper", - "task": "hop" -} diff --git a/env_hopper_hop/STEVE/STEVESAC_3/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_10/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_10/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_10/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_2/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_2/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_2/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_5/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_5/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_01_5/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_10/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_10/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_10/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_2/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_2/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_2/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_5/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_5/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_03_5/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_10/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_10/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_10/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_2/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_2/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_2/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_5/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_5/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_05_5/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_humanoid_run/Dyna_SAC_1_10/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from env_humanoid_run/Dyna_SAC_1_10/env_config.json rename to env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/env_config.json similarity index 100% rename from env_humanoid_config.json rename to env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/env_config.json diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/DynaSAC_Bounded_01_5/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 99% rename from env_walker_walk/DynaSAC_Bounded_01_5/alg_config.json rename to env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/alg_config.json index 43ed0c9..4133617 100644 --- a/env_walker_walk/DynaSAC_Bounded_01_5/alg_config.json +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/alg_config.json @@ -24,7 +24,6 @@ "world_model_lr": 0.001, "horizon": 1, "num_samples": 10, - "sas": false, "train_reward": true, "train_both": false, diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/DynaSAC_Bounded_03_5/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 100% rename from env_walker_walk/DynaSAC_Bounded_03_5/alg_config.json rename to env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/alg_config.json diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/DynaSAC_Bounded_05_5/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 100% rename from env_walker_walk/DynaSAC_Bounded_05_5/alg_config.json rename to env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/alg_config.json diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna_SAC_1_10/alg_config.json b/env_humanoid_run/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from env_walker_walk/Dyna_SAC_1_10/alg_config.json rename to env_humanoid_run/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_humanoid_run/Dyna/Dyna_SAC_1_10/env_config.json b/env_humanoid_run/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/Dyna_SAC_1_10/train_config.json b/env_humanoid_run/Dyna/Dyna_SAC_1_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/Dyna_SAC_1_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/DynaSAC_Bounded_01_5/env_config.json b/env_humanoid_run/DynaSAC_Bounded_01_5/env_config.json deleted file mode 100644 index 3f17a1c..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_01_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "humanoid", - "task": "run" -} diff --git a/env_humanoid_run/DynaSAC_Bounded_03_2/env_config.json b/env_humanoid_run/DynaSAC_Bounded_03_2/env_config.json deleted file mode 100644 index 3f17a1c..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_03_2/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "humanoid", - "task": "run" -} diff --git a/env_humanoid_run/DynaSAC_Bounded_03_5/env_config.json b/env_humanoid_run/DynaSAC_Bounded_03_5/env_config.json deleted file mode 100644 index 3f17a1c..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_03_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "humanoid", - "task": "run" -} diff --git a/env_humanoid_run/DynaSAC_Bounded_05_5/env_config.json b/env_humanoid_run/DynaSAC_Bounded_05_5/env_config.json deleted file mode 100644 index fcee853..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_05_5/env_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "gym": "dmcs", - "domain": "humanoid", - "task": "run" -} - diff --git a/env_humanoid_run/DynaSAC_Bounded_07_5/alg_config.json b/env_humanoid_run/DynaSAC_Bounded_07_5/alg_config.json deleted file mode 100644 index e8c9014..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_07_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.7, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_humanoid_run/DynaSAC_Bounded_07_5/env_config.json b/env_humanoid_run/DynaSAC_Bounded_07_5/env_config.json deleted file mode 100644 index 3f17a1c..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_07_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "humanoid", - "task": "run" -} diff --git a/env_humanoid_run/DynaSAC_Bounded_09_5/alg_config.json b/env_humanoid_run/DynaSAC_Bounded_09_5/alg_config.json deleted file mode 100644 index 4923f37..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_09_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.9, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_humanoid_run/DynaSAC_Bounded_09_5/env_config.json b/env_humanoid_run/DynaSAC_Bounded_09_5/env_config.json deleted file mode 100644 index 3f17a1c..0000000 --- a/env_humanoid_run/DynaSAC_Bounded_09_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "humanoid", - "task": "run" -} diff --git a/env_humanoid_run/STEVE/STEVESAC_3/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_10/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_10/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_10/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_2/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_2/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_2/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_5/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_5/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_01_5/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_10/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_10/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_10/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_2/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_2/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_2/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_5/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_5/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_03_5/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_10/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_10/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_10/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_2/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_2/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_2/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_5/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_5/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_05_5/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_reacher_hard/Dyna_SAC_1_10/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from env_reacher_hard/Dyna_SAC_1_10/env_config.json rename to env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/env_config.json similarity index 100% rename from env_reacher_config.json rename to env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/env_config.json diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_07_5/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 96% rename from env_finger_turn_hard/DynaSAC_Bounded_07_5/alg_config.json rename to env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/alg_config.json index e8c9014..4133617 100644 --- a/env_finger_turn_hard/DynaSAC_Bounded_07_5/alg_config.json +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/alg_config.json @@ -28,6 +28,6 @@ "train_reward": true, "train_both": false, "gripper": false, - "threshold": 0.7, + "threshold": 0.1, "exploration_sample": 5 } \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/DynaSAC_Bounded_09_5/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/alg_config.json similarity index 96% rename from env_cheetah_run/DynaSAC_Bounded_09_5/alg_config.json rename to env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/alg_config.json index 4923f37..f534721 100644 --- a/env_cheetah_run/DynaSAC_Bounded_09_5/alg_config.json +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/alg_config.json @@ -28,6 +28,6 @@ "train_reward": true, "train_both": false, "gripper": false, - "threshold": 0.9, + "threshold": 0.3, "exploration_sample": 5 } \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/DynaSAC_Bounded_09_5/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/alg_config.json similarity index 96% rename from env_finger_turn_hard/DynaSAC_Bounded_09_5/alg_config.json rename to env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/alg_config.json index 4923f37..60b0894 100644 --- a/env_finger_turn_hard/DynaSAC_Bounded_09_5/alg_config.json +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/alg_config.json @@ -28,6 +28,6 @@ "train_reward": true, "train_both": false, "gripper": false, - "threshold": 0.9, + "threshold": 0.5, "exploration_sample": 5 } \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/sample_Dyna_SAC/alg_config.json b/env_reacher_hard/Dyna/Dyna_SAC_1_10/alg_config.json similarity index 100% rename from sample_Dyna_SAC/alg_config.json rename to env_reacher_hard/Dyna/Dyna_SAC_1_10/alg_config.json diff --git a/env_reacher_hard/Dyna/Dyna_SAC_1_10/env_config.json b/env_reacher_hard/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/Dyna_SAC_1_10/train_config.json b/env_reacher_hard/Dyna/Dyna_SAC_1_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/Dyna_SAC_1_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/DynaSAC_Bounded_01_5/env_config.json b/env_reacher_hard/DynaSAC_Bounded_01_5/env_config.json deleted file mode 100644 index 7109cfa..0000000 --- a/env_reacher_hard/DynaSAC_Bounded_01_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "reacher", - "task": "hard" -} diff --git a/env_reacher_hard/DynaSAC_Bounded_03_5/env_config.json b/env_reacher_hard/DynaSAC_Bounded_03_5/env_config.json deleted file mode 100644 index 7109cfa..0000000 --- a/env_reacher_hard/DynaSAC_Bounded_03_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "reacher", - "task": "hard" -} diff --git a/env_reacher_hard/DynaSAC_Bounded_05_5/env_config.json b/env_reacher_hard/DynaSAC_Bounded_05_5/env_config.json deleted file mode 100644 index 7109cfa..0000000 --- a/env_reacher_hard/DynaSAC_Bounded_05_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "reacher", - "task": "hard" -} diff --git a/env_reacher_hard/DynaSAC_Bounded_07_5/alg_config.json b/env_reacher_hard/DynaSAC_Bounded_07_5/alg_config.json deleted file mode 100644 index e8c9014..0000000 --- a/env_reacher_hard/DynaSAC_Bounded_07_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.7, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_reacher_hard/DynaSAC_Bounded_07_5/env_config.json b/env_reacher_hard/DynaSAC_Bounded_07_5/env_config.json deleted file mode 100644 index 7109cfa..0000000 --- a/env_reacher_hard/DynaSAC_Bounded_07_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "reacher", - "task": "hard" -} diff --git a/env_reacher_hard/DynaSAC_Bounded_09_5/alg_config.json b/env_reacher_hard/DynaSAC_Bounded_09_5/alg_config.json deleted file mode 100644 index 4923f37..0000000 --- a/env_reacher_hard/DynaSAC_Bounded_09_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.9, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_reacher_hard/DynaSAC_Bounded_09_5/env_config.json b/env_reacher_hard/DynaSAC_Bounded_09_5/env_config.json deleted file mode 100644 index 7109cfa..0000000 --- a/env_reacher_hard/DynaSAC_Bounded_09_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "reacher", - "task": "hard" -} diff --git a/env_reacher_hard/STEVE/STEVESAC_3/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_10/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_10/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_10/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_2/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_2/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_2/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_5/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_5/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_01_5/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_10/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_10/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_10/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_2/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_2/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_2/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_5/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_5/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_03_5/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_10/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_10/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_10/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_2/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_2/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_2/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_5/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_5/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_05_5/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_01_10/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_10/alg_config.json new file mode 100644 index 0000000..7b58ad1 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_01_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna_SAC_1_10/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_10/env_config.json similarity index 100% rename from env_walker_walk/Dyna_SAC_1_10/env_config.json rename to env_walker_walk/Dyna/DynaSAC_Bounded_01_10/env_config.json diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_01_10/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_01_2/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_2/alg_config.json new file mode 100644 index 0000000..49ae876 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_01_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_2/env_config.json similarity index 100% rename from env_walker_config.json rename to env_walker_walk/Dyna/DynaSAC_Bounded_01_2/env_config.json diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_01_2/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/DynaSAC_Bounded_07_5/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/alg_config.json similarity index 96% rename from env_cheetah_run/DynaSAC_Bounded_07_5/alg_config.json rename to env_walker_walk/Dyna/DynaSAC_Bounded_01_5/alg_config.json index e8c9014..4133617 100644 --- a/env_cheetah_run/DynaSAC_Bounded_07_5/alg_config.json +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/alg_config.json @@ -28,6 +28,6 @@ "train_reward": true, "train_both": false, "gripper": false, - "threshold": 0.7, + "threshold": 0.1, "exploration_sample": 5 } \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/alg_config.json new file mode 100644 index 0000000..4d713e1 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/alg_config.json new file mode 100644 index 0000000..66575fa --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/alg_config.json new file mode 100644 index 0000000..f534721 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/alg_config.json new file mode 100644 index 0000000..4889c3b --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/alg_config.json new file mode 100644 index 0000000..2f569f9 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/alg_config.json new file mode 100644 index 0000000..60b0894 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/Dyna_SAC_1_10/alg_config.json b/env_walker_walk/Dyna/Dyna_SAC_1_10/alg_config.json new file mode 100644 index 0000000..e71aa83 --- /dev/null +++ b/env_walker_walk/Dyna/Dyna_SAC_1_10/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "DynaSAC_NS", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/Dyna_SAC_1_10/env_config.json b/env_walker_walk/Dyna/Dyna_SAC_1_10/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/Dyna_SAC_1_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/Dyna_SAC_1_10/train_config.json b/env_walker_walk/Dyna/Dyna_SAC_1_10/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/Dyna_SAC_1_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/DynaSAC_Bounded_01_5/env_config.json b/env_walker_walk/DynaSAC_Bounded_01_5/env_config.json deleted file mode 100644 index d12d898..0000000 --- a/env_walker_walk/DynaSAC_Bounded_01_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "walker", - "task": "walk" -} diff --git a/env_walker_walk/DynaSAC_Bounded_03_5/env_config.json b/env_walker_walk/DynaSAC_Bounded_03_5/env_config.json deleted file mode 100644 index d12d898..0000000 --- a/env_walker_walk/DynaSAC_Bounded_03_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "walker", - "task": "walk" -} diff --git a/env_walker_walk/DynaSAC_Bounded_05_5/env_config.json b/env_walker_walk/DynaSAC_Bounded_05_5/env_config.json deleted file mode 100644 index d12d898..0000000 --- a/env_walker_walk/DynaSAC_Bounded_05_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "walker", - "task": "walk" -} diff --git a/env_walker_walk/DynaSAC_Bounded_07_5/alg_config.json b/env_walker_walk/DynaSAC_Bounded_07_5/alg_config.json deleted file mode 100644 index e8c9014..0000000 --- a/env_walker_walk/DynaSAC_Bounded_07_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.7, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_walker_walk/DynaSAC_Bounded_07_5/env_config.json b/env_walker_walk/DynaSAC_Bounded_07_5/env_config.json deleted file mode 100644 index d12d898..0000000 --- a/env_walker_walk/DynaSAC_Bounded_07_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "walker", - "task": "walk" -} diff --git a/env_walker_walk/DynaSAC_Bounded_09_5/alg_config.json b/env_walker_walk/DynaSAC_Bounded_09_5/alg_config.json deleted file mode 100644 index 4923f37..0000000 --- a/env_walker_walk/DynaSAC_Bounded_09_5/alg_config.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "algorithm": "DynaSAC_Bounded", - "type": "mbrl", - "G": 1, - "G_model": 5.0, - "batch_size": 256, - "buffer_size": 1000000, - "max_steps_exploration": 256, - "max_steps_training": 1000000, - "number_steps_per_train_policy": 1, - - "reward_scale": 1.0, - "actor_lr": 3e-4, - "critic_lr": 3e-4, - "alpha_lr": 3e-4, - "gamma": 0.99, - "tau": 0.005, - - "min_noise": 0.0, - "noise_scale": 0.1, - "noise_decay": 1.0, - - "num_models": 6, - "world_model_lr": 0.001, - "horizon": 1, - "num_samples": 10, - "sas": false, - "train_reward": true, - "train_both": false, - "gripper": false, - "threshold": 0.9, - "exploration_sample": 5 -} \ No newline at end of file diff --git a/env_walker_walk/DynaSAC_Bounded_09_5/env_config.json b/env_walker_walk/DynaSAC_Bounded_09_5/env_config.json deleted file mode 100644 index d12d898..0000000 --- a/env_walker_walk/DynaSAC_Bounded_09_5/env_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "gym": "dmcs", - "domain": "walker", - "task": "walk" -} diff --git a/env_walker_walk/STEVE/STEVESAC_3/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3/alg_config.json new file mode 100644 index 0000000..d862a79 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "STEVESAC", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 3, + "num_rwd_models": 5, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3/env_config.json b/env_walker_walk/STEVE/STEVESAC_3/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3/train_config.json b/env_walker_walk/STEVE/STEVESAC_3/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_10/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_10/alg_config.json new file mode 100644 index 0000000..31deb18 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_10/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_10/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_10/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_2/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_2/alg_config.json new file mode 100644 index 0000000..f0addf2 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_2/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_2/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_2/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_5/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_5/alg_config.json new file mode 100644 index 0000000..4cda484 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.1, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_5/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_5/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_01_5/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_01_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_01_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_10/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_10/alg_config.json new file mode 100644 index 0000000..dcea6a2 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_10/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_10/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_10/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_2/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_2/alg_config.json new file mode 100644 index 0000000..10e8516 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_2/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_2/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_2/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_5/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_5/alg_config.json new file mode 100644 index 0000000..4ec6ba4 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.3, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_5/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_5/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_03_5/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_03_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_03_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_10/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_10/alg_config.json new file mode 100644 index 0000000..b7b4593 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_10/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 10 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_10/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_10/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_10/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_10/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_10/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_10/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_2/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_2/alg_config.json new file mode 100644 index 0000000..017e66b --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_2/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_2/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_2/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_2/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_2/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_2/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_2/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_5/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_5/alg_config.json new file mode 100644 index 0000000..045ce1f --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_5/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5, + "exploration_sample": 5 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_5/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_5/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_5/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_05_5/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_05_5/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_05_5/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/DynaSAC_Bounded/alg_config.json b/old_configs/DynaSAC_Bounded/alg_config.json similarity index 100% rename from DynaSAC_Bounded/alg_config.json rename to old_configs/DynaSAC_Bounded/alg_config.json diff --git a/old_configs/DynaSAC_Bounded/env_config.json b/old_configs/DynaSAC_Bounded/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/old_configs/DynaSAC_Bounded/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/DynaSAC_Bounded/train_config.json b/old_configs/DynaSAC_Bounded/train_config.json similarity index 100% rename from DynaSAC_Bounded/train_config.json rename to old_configs/DynaSAC_Bounded/train_config.json diff --git a/MBRL_dyna_binarybatchreweight_config.json b/old_configs/MBRL_dyna_binarybatchreweight_config.json similarity index 100% rename from MBRL_dyna_binarybatchreweight_config.json rename to old_configs/MBRL_dyna_binarybatchreweight_config.json diff --git a/MBRL_dyna_bivbatchreweight_config.json b/old_configs/MBRL_dyna_bivbatchreweight_config.json similarity index 100% rename from MBRL_dyna_bivbatchreweight_config.json rename to old_configs/MBRL_dyna_bivbatchreweight_config.json diff --git a/MBRL_dyna_config.json b/old_configs/MBRL_dyna_config.json similarity index 100% rename from MBRL_dyna_config.json rename to old_configs/MBRL_dyna_config.json diff --git a/MBRL_dyna_config_100.json b/old_configs/MBRL_dyna_config_100.json similarity index 100% rename from MBRL_dyna_config_100.json rename to old_configs/MBRL_dyna_config_100.json diff --git a/MBRL_dyna_config_50.json b/old_configs/MBRL_dyna_config_50.json similarity index 100% rename from MBRL_dyna_config_50.json rename to old_configs/MBRL_dyna_config_50.json diff --git a/MBRL_dyna_immerse_reweight_combo_config.json b/old_configs/MBRL_dyna_immerse_reweight_combo_config.json similarity index 100% rename from MBRL_dyna_immerse_reweight_combo_config.json rename to old_configs/MBRL_dyna_immerse_reweight_combo_config.json diff --git a/MBRL_dyna_normalizedsigmoidbatchreweight_config.json b/old_configs/MBRL_dyna_normalizedsigmoidbatchreweight_config.json similarity index 100% rename from MBRL_dyna_normalizedsigmoidbatchreweight_config.json rename to old_configs/MBRL_dyna_normalizedsigmoidbatchreweight_config.json diff --git a/MBRL_dyna_sa_config.json b/old_configs/MBRL_dyna_sa_config.json similarity index 100% rename from MBRL_dyna_sa_config.json rename to old_configs/MBRL_dyna_sa_config.json diff --git a/MBRL_dyna_sabr_config.json b/old_configs/MBRL_dyna_sabr_config.json similarity index 100% rename from MBRL_dyna_sabr_config.json rename to old_configs/MBRL_dyna_sabr_config.json diff --git a/MBRL_dyna_sas_config.json b/old_configs/MBRL_dyna_sas_config.json similarity index 100% rename from MBRL_dyna_sas_config.json rename to old_configs/MBRL_dyna_sas_config.json diff --git a/MBRL_dyna_sas_immersive_weight.json b/old_configs/MBRL_dyna_sas_immersive_weight.json similarity index 100% rename from MBRL_dyna_sas_immersive_weight.json rename to old_configs/MBRL_dyna_sas_immersive_weight.json diff --git a/MBRL_dyna_scalebatchreweight_config.json b/old_configs/MBRL_dyna_scalebatchreweight_config.json similarity index 100% rename from MBRL_dyna_scalebatchreweight_config.json rename to old_configs/MBRL_dyna_scalebatchreweight_config.json diff --git a/MBRL_dyna_sunrisebatchreweight_config.json b/old_configs/MBRL_dyna_sunrisebatchreweight_config.json similarity index 100% rename from MBRL_dyna_sunrisebatchreweight_config.json rename to old_configs/MBRL_dyna_sunrisebatchreweight_config.json diff --git a/MBRL_dyna_uwacbatchreweight_config.json b/old_configs/MBRL_dyna_uwacbatchreweight_config.json similarity index 100% rename from MBRL_dyna_uwacbatchreweight_config.json rename to old_configs/MBRL_dyna_uwacbatchreweight_config.json diff --git a/MBRL_steve_config.json b/old_configs/MBRL_steve_config.json similarity index 100% rename from MBRL_steve_config.json rename to old_configs/MBRL_steve_config.json diff --git a/MFRL_sac_config.json b/old_configs/MFRL_sac_config.json similarity index 100% rename from MFRL_sac_config.json rename to old_configs/MFRL_sac_config.json diff --git a/STEVESAC/alg_config.json b/old_configs/STEVESAC/alg_config.json similarity index 100% rename from STEVESAC/alg_config.json rename to old_configs/STEVESAC/alg_config.json diff --git a/old_configs/STEVESAC/env_config.json b/old_configs/STEVESAC/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/old_configs/STEVESAC/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/STEVESAC/train_config.json b/old_configs/STEVESAC/train_config.json similarity index 100% rename from STEVESAC/train_config.json rename to old_configs/STEVESAC/train_config.json diff --git a/STEVESAC_Bounded/alg_config.json b/old_configs/STEVESAC_Bounded/alg_config.json similarity index 100% rename from STEVESAC_Bounded/alg_config.json rename to old_configs/STEVESAC_Bounded/alg_config.json diff --git a/old_configs/STEVESAC_Bounded/env_config.json b/old_configs/STEVESAC_Bounded/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/old_configs/STEVESAC_Bounded/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/STEVESAC_Bounded/train_config.json b/old_configs/STEVESAC_Bounded/train_config.json similarity index 100% rename from STEVESAC_Bounded/train_config.json rename to old_configs/STEVESAC_Bounded/train_config.json diff --git a/old_configs/env_acrobot_config.json b/old_configs/env_acrobot_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/old_configs/env_acrobot_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_ball_in_cup_config.json b/old_configs/env_ball_in_cup_config.json similarity index 100% rename from env_ball_in_cup_config.json rename to old_configs/env_ball_in_cup_config.json diff --git a/old_configs/env_cartpole_config.json b/old_configs/env_cartpole_config.json new file mode 100644 index 0000000..14f64ee --- /dev/null +++ b/old_configs/env_cartpole_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "cartpole" +} diff --git a/old_configs/env_cheetah_config.json b/old_configs/env_cheetah_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/old_configs/env_cheetah_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/old_configs/env_finger_turn_config.json b/old_configs/env_finger_turn_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/old_configs/env_finger_turn_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/old_configs/env_fish_config.json b/old_configs/env_fish_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/old_configs/env_fish_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/old_configs/env_hopper_config.json b/old_configs/env_hopper_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/old_configs/env_hopper_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/old_configs/env_humanoid_config.json b/old_configs/env_humanoid_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/old_configs/env_humanoid_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_manipulator_config.json b/old_configs/env_manipulator_config.json similarity index 100% rename from env_manipulator_config.json rename to old_configs/env_manipulator_config.json diff --git a/old_configs/env_reacher_config.json b/old_configs/env_reacher_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/old_configs/env_reacher_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/old_configs/env_walker_config.json b/old_configs/env_walker_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/old_configs/env_walker_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/openai_cheetah_config.json b/old_configs/openai_cheetah_config.json similarity index 100% rename from openai_cheetah_config.json rename to old_configs/openai_cheetah_config.json diff --git a/old_configs/sample_Dyna_SAC/alg_config.json b/old_configs/sample_Dyna_SAC/alg_config.json new file mode 100644 index 0000000..e71aa83 --- /dev/null +++ b/old_configs/sample_Dyna_SAC/alg_config.json @@ -0,0 +1,32 @@ +{ + "algorithm": "DynaSAC_NS", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false +} \ No newline at end of file diff --git a/old_configs/sample_Dyna_SAC/env_config.json b/old_configs/sample_Dyna_SAC/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/old_configs/sample_Dyna_SAC/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/sample_Dyna_SAC/train_config.json b/old_configs/sample_Dyna_SAC/train_config.json similarity index 100% rename from sample_Dyna_SAC/train_config.json rename to old_configs/sample_Dyna_SAC/train_config.json diff --git a/sample_SAC/alg_config.json b/old_configs/sample_SAC/alg_config.json similarity index 100% rename from sample_SAC/alg_config.json rename to old_configs/sample_SAC/alg_config.json diff --git a/sample_SAC/env_config.json b/old_configs/sample_SAC/env_config.json similarity index 100% rename from sample_SAC/env_config.json rename to old_configs/sample_SAC/env_config.json diff --git a/sample_SAC/train_config.json b/old_configs/sample_SAC/train_config.json similarity index 100% rename from sample_SAC/train_config.json rename to old_configs/sample_SAC/train_config.json diff --git a/training_10_config.json b/old_configs/training_10_config.json similarity index 100% rename from training_10_config.json rename to old_configs/training_10_config.json diff --git a/training_15_config.json b/old_configs/training_15_config.json similarity index 100% rename from training_15_config.json rename to old_configs/training_15_config.json diff --git a/training_25_config.json b/old_configs/training_25_config.json similarity index 100% rename from training_25_config.json rename to old_configs/training_25_config.json diff --git a/training_35_config.json b/old_configs/training_35_config.json similarity index 100% rename from training_35_config.json rename to old_configs/training_35_config.json diff --git a/training_45_config.json b/old_configs/training_45_config.json similarity index 100% rename from training_45_config.json rename to old_configs/training_45_config.json diff --git a/training_55_config.json b/old_configs/training_55_config.json similarity index 100% rename from training_55_config.json rename to old_configs/training_55_config.json diff --git a/training_config.json b/old_configs/training_config.json similarity index 100% rename from training_config.json rename to old_configs/training_config.json