naming convention

qiaoting159753 · Dec 21, 2024 · 51e1fb3 · 51e1fb3
1 parent b2c4082
commit 51e1fb3
Show file tree

Hide file tree

Showing 31 changed files with 98 additions and 57 deletions.
diff --git a/DynaSAC_Bounded/alg_config.json b/DynaSAC_Bounded/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.1,
     "exploration_sample": 5
 }
diff --git a/Dyna_SAC/alg_config.json b/Dyna_SAC/alg_config.json
@@ -24,8 +24,8 @@
     "world_model_lr": 0.001,
     "horizon": 1,
     "num_samples": 10,
-    "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true
+    "sas": false,
+    "train_both": false,
+    "gripper": false
 }
diff --git a/Dyna_SAC/env_config.json b/Dyna_SAC/env_config.json
@@ -1,5 +1,5 @@
 {
     "gym": "dmcs",
-    "task": "catch",
-    "domain": "ball_in_cup"
+    "task": "run",
+    "domain": "cheetah"
 }
diff --git a/Dyna_SAC_cheetah/alg_config.json b/Dyna_SAC_cheetah/alg_config.json
@@ -0,0 +1,31 @@
+{
+    "algorithm": "DynaSAC_NS",
+    "type": "mbrl",
+    "G": 1,
+    "G_model": 5.0,
+    "batch_size": 256,
+    "buffer_size": 1000000,
+    "max_steps_exploration": 256,
+    "max_steps_training": 1000000,
+    "number_steps_per_train_policy": 1,
+
+    "reward_scale": 1.0,
+    "actor_lr": 3e-4,
+    "critic_lr": 3e-4,
+    "alpha_lr": 3e-4,
+    "gamma": 0.99,
+    "tau": 0.005,
+
+    "min_noise": 0.0,
+    "noise_scale": 0.1,
+    "noise_decay": 1.0,
+
+    "num_models": 6,
+    "world_model_lr": 0.001,
+    "horizon": 1,
+    "num_samples": 10,
+    "sas": false,
+    "train_reward": true,
+    "train_both": false,
+    "gripper": false
+}
diff --git a/Dyna_SAC_cheetah/env_config.json b/Dyna_SAC_cheetah/env_config.json
@@ -0,0 +1,5 @@
+{
+    "gym": "dmcs",
+    "task": "run",
+    "domain": "cheetah"
+}
diff --git a/Dyna_SAC_cheetah/train_config.json b/Dyna_SAC_cheetah/train_config.json
@@ -0,0 +1,5 @@
+{
+    "seeds": [10],
+    "number_steps_per_evaluation": 10000,
+    "number_eval_episodes": 10
+}
diff --git a/env_cheetah_run/DynaSAC_Bounded_01_5/alg_config.json b/env_cheetah_run/DynaSAC_Bounded_01_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.1,
     "exploration_sample": 5
 }
diff --git a/env_cheetah_run/DynaSAC_Bounded_03_5/alg_config.json b/env_cheetah_run/DynaSAC_Bounded_03_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.3,
     "exploration_sample": 5
 }
diff --git a/env_cheetah_run/DynaSAC_Bounded_05_5/alg_config.json b/env_cheetah_run/DynaSAC_Bounded_05_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.5,
     "exploration_sample": 5
 }
diff --git a/env_cheetah_run/DynaSAC_Bounded_07_5/alg_config.json b/env_cheetah_run/DynaSAC_Bounded_07_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.7,
     "exploration_sample": 5
 }
diff --git a/env_cheetah_run/DynaSAC_Bounded_09_5/alg_config.json b/env_cheetah_run/DynaSAC_Bounded_09_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.9,
     "exploration_sample": 5
 }
diff --git a/env_hopper_hop/DynaSAC_Bounded_01_5/alg_config.json b/env_hopper_hop/DynaSAC_Bounded_01_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.1,
     "exploration_sample": 5
 }
diff --git a/env_hopper_hop/DynaSAC_Bounded_03_5/alg_config.json b/env_hopper_hop/DynaSAC_Bounded_03_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.3,
     "exploration_sample": 5
 }
diff --git a/env_hopper_hop/DynaSAC_Bounded_05_5/alg_config.json b/env_hopper_hop/DynaSAC_Bounded_05_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.5,
     "exploration_sample": 5
 }
diff --git a/env_hopper_hop/DynaSAC_Bounded_07_5/alg_config.json b/env_hopper_hop/DynaSAC_Bounded_07_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.7,
     "exploration_sample": 5
 }
diff --git a/env_hopper_hop/DynaSAC_Bounded_09_5/alg_config.json b/env_hopper_hop/DynaSAC_Bounded_09_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.9,
     "exploration_sample": 5
 }
diff --git a/env_humanoid_run/DynaSAC_Bounded_01_5/alg_config.json b/env_humanoid_run/DynaSAC_Bounded_01_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.1,
     "exploration_sample": 5
 }
diff --git a/env_humanoid_run/DynaSAC_Bounded_03_5/alg_config.json b/env_humanoid_run/DynaSAC_Bounded_03_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.3,
     "exploration_sample": 5
 }
diff --git a/env_humanoid_run/DynaSAC_Bounded_05_5/alg_config.json b/env_humanoid_run/DynaSAC_Bounded_05_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.5,
     "exploration_sample": 5
 }
diff --git a/env_humanoid_run/DynaSAC_Bounded_07_5/alg_config.json b/env_humanoid_run/DynaSAC_Bounded_07_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.7,
     "exploration_sample": 5
 }
diff --git a/env_humanoid_run/DynaSAC_Bounded_09_5/alg_config.json b/env_humanoid_run/DynaSAC_Bounded_09_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.9,
     "exploration_sample": 5
 }
diff --git a/env_reacher_hard/DynaSAC_Bounded_01_5/alg_config.json b/env_reacher_hard/DynaSAC_Bounded_01_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.1,
     "exploration_sample": 5
 }
diff --git a/env_reacher_hard/DynaSAC_Bounded_03_5/alg_config.json b/env_reacher_hard/DynaSAC_Bounded_03_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.3,
     "exploration_sample": 5
 }
diff --git a/env_reacher_hard/DynaSAC_Bounded_05_5/alg_config.json b/env_reacher_hard/DynaSAC_Bounded_05_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.5,
     "exploration_sample": 5
 }
diff --git a/env_reacher_hard/DynaSAC_Bounded_07_5/alg_config.json b/env_reacher_hard/DynaSAC_Bounded_07_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.7,
     "exploration_sample": 5
 }
diff --git a/env_reacher_hard/DynaSAC_Bounded_09_5/alg_config.json b/env_reacher_hard/DynaSAC_Bounded_09_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.9,
     "exploration_sample": 5
 }
diff --git a/env_walker_walk/DynaSAC_Bounded_01_5/alg_config.json b/env_walker_walk/DynaSAC_Bounded_01_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.1,
     "exploration_sample": 5
 }
diff --git a/env_walker_walk/DynaSAC_Bounded_03_5/alg_config.json b/env_walker_walk/DynaSAC_Bounded_03_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.3,
     "exploration_sample": 5
 }
diff --git a/env_walker_walk/DynaSAC_Bounded_05_5/alg_config.json b/env_walker_walk/DynaSAC_Bounded_05_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.5,
     "exploration_sample": 5
 }
diff --git a/env_walker_walk/DynaSAC_Bounded_07_5/alg_config.json b/env_walker_walk/DynaSAC_Bounded_07_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.7,
     "exploration_sample": 5
 }
diff --git a/env_walker_walk/DynaSAC_Bounded_09_5/alg_config.json b/env_walker_walk/DynaSAC_Bounded_09_5/alg_config.json
@@ -26,8 +26,8 @@
     "num_samples": 10,
     "sas": false,
     "train_reward": true,
-    "train_both": true,
-    "gripper": true,
+    "train_both": false,
+    "gripper": false,
     "threshold": 0.9,
     "exploration_sample": 5
 }