diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..9b38eaf --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_acrobot_swingup/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/alg_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..1e79453 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/env_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..ce60721 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swingup", + "domain": "acrobot" +} diff --git a/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/train_config.json b/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_acrobot_swingup/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..9b38eaf --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_cheetah_run/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_Yao/alg_config.json b/env_cheetah_run/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..1e79453 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_cheetah_run/STEVE/STEVESAC_3_Yao/env_config.json b/env_cheetah_run/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..8cc0414 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "cheetah" +} diff --git a/env_cheetah_run/STEVE/STEVESAC_3_Yao/train_config.json b/env_cheetah_run/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_cheetah_run/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..9b38eaf --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_finger_turn_hard/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/alg_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..1e79453 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/env_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..619b54b --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "turn_hard", + "domain": "finger" +} diff --git a/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/train_config.json b/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_finger_turn_hard/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..9b38eaf --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_fish_swim/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_Yao/alg_config.json b/env_fish_swim/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..1e79453 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_fish_swim/STEVE/STEVESAC_3_Yao/env_config.json b/env_fish_swim/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..9e6a7f9 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "swim", + "domain": "fish" +} diff --git a/env_fish_swim/STEVE/STEVESAC_3_Yao/train_config.json b/env_fish_swim/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_fish_swim/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..9b38eaf --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_hopper_hop/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_Yao/alg_config.json b/env_hopper_hop/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..1e79453 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_hopper_hop/STEVE/STEVESAC_3_Yao/env_config.json b/env_hopper_hop/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..d1c47ad --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hop", + "domain": "hopper" +} diff --git a/env_hopper_hop/STEVE/STEVESAC_3_Yao/train_config.json b/env_hopper_hop/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_hopper_hop/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..9b38eaf --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_humanoid_run/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_Yao/alg_config.json b/env_humanoid_run/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..1e79453 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_humanoid_run/STEVE/STEVESAC_3_Yao/env_config.json b/env_humanoid_run/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..ef57631 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "run", + "domain": "humanoid" +} diff --git a/env_humanoid_run/STEVE/STEVESAC_3_Yao/train_config.json b/env_humanoid_run/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_humanoid_run/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..e8a2b30 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 400000, + "max_steps_exploration": 256, + "max_steps_training": 400000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_reacher_hard/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_Yao/alg_config.json b/env_reacher_hard/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..ec00e52 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 400000, + "max_steps_exploration": 256, + "max_steps_training": 400000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_reacher_hard/STEVE/STEVESAC_3_Yao/env_config.json b/env_reacher_hard/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..64cc733 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "hard", + "domain": "reacher" +} diff --git a/env_reacher_hard/STEVE/STEVESAC_3_Yao/train_config.json b/env_reacher_hard/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_reacher_hard/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/alg_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/alg_config.json new file mode 100644 index 0000000..e8a2b30 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 400000, + "max_steps_exploration": 256, + "max_steps_training": 400000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/env_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/train_config.json b/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/env_walker_walk/Dyna/DynaSAC_Bounded_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_Yao/alg_config.json b/env_walker_walk/STEVE/STEVESAC_3_Yao/alg_config.json new file mode 100644 index 0000000..ec00e52 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_Yao/alg_config.json @@ -0,0 +1,34 @@ +{ + "algorithm": "STEVESAC_Bounded_Yao", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 400000, + "max_steps_exploration": 256, + "max_steps_training": 400000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "num_rwd_models": 5, + + "world_model_lr": 0.001, + "horizon": 3, + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 1.0, + "exploration_sample": 2 +} \ No newline at end of file diff --git a/env_walker_walk/STEVE/STEVESAC_3_Yao/env_config.json b/env_walker_walk/STEVE/STEVESAC_3_Yao/env_config.json new file mode 100644 index 0000000..3303c68 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_Yao/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "walk", + "domain": "walker" +} diff --git a/env_walker_walk/STEVE/STEVESAC_3_Yao/train_config.json b/env_walker_walk/STEVE/STEVESAC_3_Yao/train_config.json new file mode 100644 index 0000000..07ba116 --- /dev/null +++ b/env_walker_walk/STEVE/STEVESAC_3_Yao/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15,25,35,45,55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +} diff --git a/sample_iw/alg_config.json b/sample_iw/alg_config.json new file mode 100644 index 0000000..d1c50aa --- /dev/null +++ b/sample_iw/alg_config.json @@ -0,0 +1,33 @@ +{ + "algorithm": "DynaSAC_NS_IW", + "type": "mbrl", + "G": 1, + "G_model": 5.0, + "batch_size": 256, + "buffer_size": 1000000, + "max_steps_exploration": 256, + "max_steps_training": 1000000, + "number_steps_per_train_policy": 1, + + "reward_scale": 1.0, + "actor_lr": 3e-4, + "critic_lr": 3e-4, + "alpha_lr": 3e-4, + "gamma": 0.99, + "tau": 0.005, + + "min_noise": 0.0, + "noise_scale": 0.1, + "noise_decay": 1.0, + + "num_models": 6, + "world_model_lr": 0.001, + "horizon": 1, + "num_samples": 10, + + "sas": false, + "train_reward": true, + "train_both": false, + "gripper": false, + "threshold": 0.5 +} \ No newline at end of file diff --git a/sample_iw/env_config.json b/sample_iw/env_config.json new file mode 100644 index 0000000..4d0e5d8 --- /dev/null +++ b/sample_iw/env_config.json @@ -0,0 +1,5 @@ +{ + "gym": "dmcs", + "task": "catch", + "domain": "ball_in_cup" +} diff --git a/sample_iw/train_config.json b/sample_iw/train_config.json new file mode 100644 index 0000000..b1a5fa8 --- /dev/null +++ b/sample_iw/train_config.json @@ -0,0 +1,5 @@ +{ + "seeds": [15, 25, 35, 45, 55], + "number_steps_per_evaluation": 10000, + "number_eval_episodes": 10 +}