From 10c76dca17826d614fc10d9c73228f33d14a4f48 Mon Sep 17 00:00:00 2001
From: Ashok Arora <arora8715@gmail.com>
Date: Sat, 29 Jun 2024 16:08:30 +0530
Subject: [PATCH 1/5] Add carflag-v0 env

Car Flag tasks a car with driving across a 1D line to the correct flag.
The car must first drive to the oracle flag and then to the correct endpoint.
The agent's observation is a vector of three floats: its position on the line,
its velocity at each timestep, and the goal flag's location when it reaches
the oracle flag. The agent's actions alter its velocity: it can accelerate left,
perform a no-op (maintain current velocity), or accelerate right.
---
 popgym/envs/carflag.py | 144 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 popgym/envs/carflag.py

diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py
new file mode 100644
index 0000000..31ccac9
--- /dev/null
+++ b/popgym/envs/carflag.py
@@ -0,0 +1,144 @@
+"""Car Flag tasks a car with driving across a 1D line to the correct flag. 
+
+The car must first drive to the oracle flag and then to the correct endpoint. 
+The agent's observation is a vector of three floats: its position on the line, 
+its velocity at each timestep, and the goal flag's location when it reaches 
+the oracle flag. The agent's actions alter its velocity: it can accelerate left, 
+perform a no-op (maintain current velocity), or accelerate right."""
+
+import gymnasium as gym
+import numpy as np
+
+from popgym.core.env import POPGymEnv
+
+
+class CarFlag(POPGymEnv):
+    """Car Flag tasks a car with driving across a 1D line to the correct flag. 
+
+    The car must first drive to the oracle flag and then to the correct endpoint. 
+    The agent's observation is a vector of three floats: its position on the line, 
+    its velocity at each timestep, and the goal flag's location when it reaches 
+    the oracle flag. The agent's actions alter its velocity: it can accelerate left, 
+    perform a no-op (maintain current velocity), or accelerate right.
+
+    Args:
+        discrete: True, or False. Sets the action space to discrete or continuous.
+
+    Returns:
+        A gym environment
+    """
+    def __init__(self, discrete: bool):
+        self.max_position = 1.1
+        self.min_position = -self.max_position
+        self.max_speed = 0.07
+
+        self.min_action = -1.0
+        self.max_action = 1.0
+
+        self.heaven_position = 1.0
+        self.hell_position = -1.0
+        self.oracle_position = 0.5
+        self.power = 0.0015
+
+        self.low_state = np.array([self.min_position, -self.max_speed])
+        self.high_state = np.array([self.max_position, self.max_speed])
+
+        # When the cart is within this vicinity, it observes the direction given
+        # by the oracle
+        self.oracle_delta = 0.2
+
+        self.low_state = np.array(
+            [self.min_position, -self.max_speed, -1.0], dtype=np.float32
+        )
+        self.high_state = np.array(
+            [self.max_position, self.max_speed, 1.0], dtype=np.float32
+        )
+
+        self.discrete = discrete
+
+        if self.discrete:
+            self.action_space = gym.spaces.Discrete(3)
+        else:
+            self.action_space = gym.spaces.Box(
+                low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
+            )
+        self.observation_space = gym.spaces.Box(
+            low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32
+        )
+
+        self.np_random = None
+        self.state = None
+
+    def step(self, action):
+        position = self.state[0]
+        velocity = self.state[1]
+        if self.discrete:
+            # 0 is -1, 1 is 0, 2 is 1
+            force = action - 1
+        else:
+            force = np.clip(action, -1, 1)
+
+        velocity += force * self.power
+        velocity = min(velocity, self.max_speed)
+        velocity = max(velocity, -self.max_speed)
+        position += velocity
+        position = min(position, self.max_position)
+        position = max(position, self.min_position)
+        if position == self.min_position and velocity < 0:
+            velocity = 0
+
+        max_position = max(self.heaven_position, self.hell_position)
+        min_position = min(self.heaven_position, self.hell_position)
+
+        done = bool(position >= max_position or position <= min_position)
+
+        env_reward = 0
+
+        if self.heaven_position > self.hell_position:
+            if position >= self.heaven_position:
+                env_reward = 1.0
+
+            if position <= self.hell_position:
+                env_reward = -1.0
+
+        if self.heaven_position < self.hell_position:
+            if position <= self.heaven_position:
+                env_reward = 1.0
+
+            if position >= self.hell_position:
+                env_reward = -1.0
+
+        direction = 0.0
+        if (
+            position >= self.oracle_position - self.oracle_delta
+            and position <= self.oracle_position + self.oracle_delta
+        ):
+            if self.heaven_position > self.hell_position:
+                # Heaven on the right
+                direction = 1.0
+            else:
+                # Heaven on the left
+                direction = -1.0
+
+        self.state = np.array([position, velocity, direction])
+
+        return self.state, env_reward, done, {"is_success": env_reward > 0}
+
+    def reset(self):
+        # Randomize the heaven/hell location
+        if self.np_random.integers(low=0, high=2, size=1) == 0:
+            self.heaven_position = 1.0
+        else:
+            self.heaven_position = -1.0
+
+        self.hell_position = -self.heaven_position
+
+        self.state = np.array([self.np_random.uniform(low=-0.2, high=0.2), 0, 0.0])
+        return np.array(self.state)
+
+    def get_state(self):
+        # Return the position of the car, oracle, and goal
+        return self.state, self.oracle_position, self.heaven_position, self.hell_position
+
+    def render(self):
+        return None

From 9a71557ec1ed36cfd1ca94f579d3b849472272ba Mon Sep 17 00:00:00 2001
From: Ashok Arora <arora8715@gmail.com>
Date: Mon, 1 Jul 2024 11:12:13 +0530
Subject: [PATCH 2/5] Adds easy, medium and hard levels

The easy level has the range [-1, 1].
The medium level has the range [-3, 3].
The hard level has the range [-5, 5].
---
 popgym/envs/carflag.py | 80 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 10 deletions(-)

diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py
index 31ccac9..acad337 100644
--- a/popgym/envs/carflag.py
+++ b/popgym/envs/carflag.py
@@ -6,6 +6,8 @@
 the oracle flag. The agent's actions alter its velocity: it can accelerate left, 
 perform a no-op (maintain current velocity), or accelerate right."""
 
+from typing import Any, Dict, Optional, Tuple
+
 import gymnasium as gym
 import numpy as np
 
@@ -13,12 +15,12 @@
 
 
 class CarFlag(POPGymEnv):
-    """Car Flag tasks a car with driving across a 1D line to the correct flag. 
+    """Car Flag tasks a car with driving across a 1D line to the correct flag.
 
-    The car must first drive to the oracle flag and then to the correct endpoint. 
-    The agent's observation is a vector of three floats: its position on the line, 
-    its velocity at each timestep, and the goal flag's location when it reaches 
-    the oracle flag. The agent's actions alter its velocity: it can accelerate left, 
+    The car must first drive to the oracle flag and then to the correct endpoint.
+    The agent's observation is a vector of three floats: its position on the line,
+    its velocity at each timestep, and the goal flag's location when it reaches
+    the oracle flag. The agent's actions alter its velocity: it can accelerate left,
     perform a no-op (maintain current velocity), or accelerate right.
 
     Args:
@@ -27,8 +29,21 @@ class CarFlag(POPGymEnv):
     Returns:
         A gym environment
     """
-    def __init__(self, discrete: bool):
-        self.max_position = 1.1
+
+    def __init__(self, discrete=True, difficulty="easy"):
+        assert difficulty in ["easy", "medium", "hard"]
+        if difficulty == "easy":
+            self.heaven_position = 1.0
+            self.hell_position = -1.0
+        elif difficulty == "medium":
+            self.heaven_position = 3.0
+            self.hell_position = -3.0
+        elif difficulty == "hard":
+            self.heaven_position = 5.0
+            self.hell_position = -5.0
+        else:
+            raise NotImplementedError(f"Invalid difficulty {difficulty}")
+        self.max_position = self.heaven_position + 0.1
         self.min_position = -self.max_position
         self.max_speed = 0.07
 
@@ -124,7 +139,13 @@ def step(self, action):
 
         return self.state, env_reward, done, {"is_success": env_reward > 0}
 
-    def reset(self):
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ) -> Tuple[gym.core.ObsType, Dict[str, Any]]:
+        super().reset(seed=seed)
         # Randomize the heaven/hell location
         if self.np_random.integers(low=0, high=2, size=1) == 0:
             self.heaven_position = 1.0
@@ -134,11 +155,50 @@ def reset(self):
         self.hell_position = -self.heaven_position
 
         self.state = np.array([self.np_random.uniform(low=-0.2, high=0.2), 0, 0.0])
-        return np.array(self.state)
+        return np.array(self.state), {}
 
     def get_state(self):
         # Return the position of the car, oracle, and goal
-        return self.state, self.oracle_position, self.heaven_position, self.hell_position
+        return (
+            self.state,
+            self.oracle_position,
+            self.heaven_position,
+            self.hell_position,
+        )
 
     def render(self):
         return None
+
+
+if __name__ == "__main__":
+    e = CarFlag()
+    obs = e.reset()
+    e.render()
+    while not done:
+        action = np.array(input("Enter action: ")).astype(np.int8)
+        obs, reward, done, info = e.step(action)
+        print(f"reward = {reward}")
+
+
+class CarFlagEasy(CarFlag):
+    """Car Flag tasks a car with driving across a 1D line to the correct flag.
+    The easy level has the range [-1, 1]."""
+
+    def __init__(self):
+        super().__init__("easy")
+
+
+class CarFlagMedium(CarFlag):
+    """Car Flag tasks a car with driving across a 1D line to the correct flag.
+    The medium level has the range [-3, 3]."""
+
+    def __init__(self):
+        super().__init__("medium")
+
+
+class CarFlagHard(CarFlag):
+    """Car Flag tasks a car with driving across a 1D line to the correct flag.
+    The hard level has the range [-5, 5]."""
+
+    def __init__(self):
+        super().__init__("hard")

From 94f618094887ea0f29088af47ef8c2f2b0196c00 Mon Sep 17 00:00:00 2001
From: Ashok Arora <arora8715@gmail.com>
Date: Mon, 1 Jul 2024 11:17:31 +0530
Subject: [PATCH 3/5] Adds the env to the list of envs

---
 popgym/envs/__init__.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/popgym/envs/__init__.py b/popgym/envs/__init__.py
index 4df6d00..9a428d7 100644
--- a/popgym/envs/__init__.py
+++ b/popgym/envs/__init__.py
@@ -19,6 +19,12 @@
     BattleshipHard,
     BattleshipMedium,
 )
+from popgym.envs.carflag import (
+    CarFlag,
+    CarFlagEasy,
+    CarFlagMedium,
+    CarFlagHard,
+)
 from popgym.envs.concentration import (
     Concentration,
     ConcentrationEasy,
@@ -225,6 +231,7 @@
     Battleship: {"id": "popgym-Battleship-v0"},
     Concentration: {"id": "popgym-Concentration-v0"},
     MineSweeper: {"id": "popgym-MineSweeper-v0"},
+    CarFlag: {"id": "popgym-CarFlag-v0"},
 }
 
 GAME_EASY: Dict[gym.Env, Dict[str, Any]] = {
@@ -232,6 +239,7 @@
     BattleshipEasy: {"id": "popgym-BattleshipEasy-v0"},
     ConcentrationEasy: {"id": "popgym-ConcentrationEasy-v0"},
     MineSweeperEasy: {"id": "popgym-MineSweeperEasy-v0"},
+    CarFlagEasy: {"id": "popgym-CarFlagEasy-v0"},
 }
 
 GAME_MEDIUM: Dict[gym.Env, Dict[str, Any]] = {
@@ -239,6 +247,7 @@
     BattleshipMedium: {"id": "popgym-BattleshipMedium-v0"},
     ConcentrationMedium: {"id": "popgym-ConcentrationMedium-v0"},
     MineSweeperMedium: {"id": "popgym-MineSweeperMedium-v0"},
+    CarFlagMedium: {"id": "popgym-CarFlagMedium-v0"},
 }
 
 GAME_HARD: Dict[gym.Env, Dict[str, Any]] = {
@@ -246,6 +255,7 @@
     BattleshipHard: {"id": "popgym-BattleshipHard-v0"},
     ConcentrationHard: {"id": "popgym-ConcentrationHard-v0"},
     MineSweeperHard: {"id": "popgym-MineSweeperHard-v0"},
+    CarFlagHard: {"id": "popgym-CarFlagHard-v0"},
 }
 
 ALL_GAME = {**GAME_EASY, **GAME_MEDIUM, **GAME_HARD}

From c68b7cff969c394d0dc43f54924849e700ac7889 Mon Sep 17 00:00:00 2001
From: Ashok Arora <arora8715@gmail.com>
Date: Sun, 25 Aug 2024 22:28:34 +0530
Subject: [PATCH 4/5] replace done with terminated, truncated

---
 popgym/envs/carflag.py | 62 ++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py
index acad337..9e110ed 100644
--- a/popgym/envs/carflag.py
+++ b/popgym/envs/carflag.py
@@ -31,16 +31,20 @@ class CarFlag(POPGymEnv):
     """
 
     def __init__(self, discrete=True, difficulty="easy"):
-        assert difficulty in ["easy", "medium", "hard"]
-        if difficulty == "easy":
+        self.difficulty = difficulty
+        assert self.difficulty in ["easy", "medium", "hard"]
+        if self.difficulty == "easy":
             self.heaven_position = 1.0
             self.hell_position = -1.0
-        elif difficulty == "medium":
+            self.max_steps = 200
+        elif self.difficulty == "medium":
             self.heaven_position = 3.0
             self.hell_position = -3.0
-        elif difficulty == "hard":
+            self.max_steps = 300
+        elif self.difficulty == "hard":
             self.heaven_position = 5.0
             self.hell_position = -5.0
+            self.max_steps = 400
         else:
             raise NotImplementedError(f"Invalid difficulty {difficulty}")
         self.max_position = self.heaven_position + 0.1
@@ -50,14 +54,9 @@ def __init__(self, discrete=True, difficulty="easy"):
         self.min_action = -1.0
         self.max_action = 1.0
 
-        self.heaven_position = 1.0
-        self.hell_position = -1.0
         self.oracle_position = 0.5
         self.power = 0.0015
 
-        self.low_state = np.array([self.min_position, -self.max_speed])
-        self.high_state = np.array([self.max_position, self.max_speed])
-
         # When the cart is within this vicinity, it observes the direction given
         # by the oracle
         self.oracle_delta = 0.2
@@ -81,10 +80,18 @@ def __init__(self, discrete=True, difficulty="easy"):
             low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32
         )
 
+        self.state_space = gym.spaces.Box(
+            low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32
+        )
+
         self.np_random = None
         self.state = None
 
+        self.current_step = 0
+
     def step(self, action):
+        self.current_step += 1
+
         position = self.state[0]
         velocity = self.state[1]
         if self.discrete:
@@ -105,7 +112,8 @@ def step(self, action):
         max_position = max(self.heaven_position, self.hell_position)
         min_position = min(self.heaven_position, self.hell_position)
 
-        done = bool(position >= max_position or position <= min_position)
+        terminated = bool(position >= max_position or position <= min_position)
+        truncated = bool(self.current_step >= self.max_steps)
 
         env_reward = 0
 
@@ -135,9 +143,19 @@ def step(self, action):
                 # Heaven on the left
                 direction = -1.0
 
+        position = np.clip(position, self.min_position, self.max_position)
+        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
+        direction = np.clip(direction, -1.0, 1.0)
+
         self.state = np.array([position, velocity, direction])
 
-        return self.state, env_reward, done, {"is_success": env_reward > 0}
+        return (
+            self.state,
+            env_reward,
+            terminated,
+            truncated,
+            {"is_success": env_reward > 0},
+        )
 
     def reset(
         self,
@@ -148,13 +166,29 @@ def reset(
         super().reset(seed=seed)
         # Randomize the heaven/hell location
         if self.np_random.integers(low=0, high=2, size=1) == 0:
-            self.heaven_position = 1.0
+            if self.difficulty == "easy":
+                self.heaven_position = 1.0
+            elif self.difficulty == "medium":
+                self.heaven_position = 3.0
+            elif self.difficulty == "hard":
+                self.heaven_position = 5.0
         else:
-            self.heaven_position = -1.0
+            if self.difficulty == "easy":
+                self.heaven_position = -1.0
+            elif self.difficulty == "medium":
+                self.heaven_position = -3.0
+            elif self.difficulty == "hard":
+                self.heaven_position = -5.0
 
         self.hell_position = -self.heaven_position
 
-        self.state = np.array([self.np_random.uniform(low=-0.2, high=0.2), 0, 0.0])
+        position = self.np_random.uniform(low=self.min_position, high=self.max_position)
+        velocity = 0.0
+        direction = 0.0
+
+        self.state = np.array([position, velocity, direction], dtype=np.float32)
+        self.current_step = 0  # Reset step counter
+
         return np.array(self.state), {}
 
     def get_state(self):

From a537918dc007a5ef96c42effa49cb54a68e68cc1 Mon Sep 17 00:00:00 2001
From: Ashok Arora <arora8715@gmail.com>
Date: Tue, 27 Aug 2024 22:50:54 +0530
Subject: [PATCH 5/5] fix the state_space dim error

---
 popgym/envs/carflag.py | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py
index 9e110ed..52cee2d 100644
--- a/popgym/envs/carflag.py
+++ b/popgym/envs/carflag.py
@@ -80,10 +80,18 @@ def __init__(self, discrete=True, difficulty="easy"):
             low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32
         )
 
-        self.state_space = gym.spaces.Box(
-            low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32
+        # Define the lower and upper bounds for the state space
+        low = np.array(
+            [self.min_position, -self.max_speed, -1.0, self.oracle_position, -5, -5],
+            dtype=np.float32,
+        )
+        high = np.array(
+            [self.max_position, self.max_speed, 1.0, self.oracle_position, 5, 5],
+            dtype=np.float32,
         )
 
+        self.state_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
+
         self.np_random = None
         self.state = None
 
@@ -143,11 +151,7 @@ def step(self, action):
                 # Heaven on the left
                 direction = -1.0
 
-        position = np.clip(position, self.min_position, self.max_position)
-        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
-        direction = np.clip(direction, -1.0, 1.0)
-
-        self.state = np.array([position, velocity, direction])
+        self.state = np.array([position, velocity, direction], dtype=np.float32)
 
         return (
             self.state,
@@ -189,15 +193,20 @@ def reset(
         self.state = np.array([position, velocity, direction], dtype=np.float32)
         self.current_step = 0  # Reset step counter
 
-        return np.array(self.state), {}
+        return self.state, {}
 
     def get_state(self):
         # Return the position of the car, oracle, and goal
-        return (
-            self.state,
-            self.oracle_position,
-            self.heaven_position,
-            self.hell_position,
+        return np.array(
+            [
+                self.state[0],
+                self.state[1],
+                self.state[2],
+                self.oracle_position,
+                self.heaven_position,
+                self.hell_position,
+            ],
+            dtype=np.float32,
         )
 
     def render(self):