From 10c76dca17826d614fc10d9c73228f33d14a4f48 Mon Sep 17 00:00:00 2001 From: Ashok Arora Date: Sat, 29 Jun 2024 16:08:30 +0530 Subject: [PATCH 1/5] Add carflag-v0 env Car Flag tasks a car with driving across a 1D line to the correct flag. The car must first drive to the oracle flag and then to the correct endpoint. The agent's observation is a vector of three floats: its position on the line, its velocity at each timestep, and the goal flag's location when it reaches the oracle flag. The agent's actions alter its velocity: it can accelerate left, perform a no-op (maintain current velocity), or accelerate right. --- popgym/envs/carflag.py | 144 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 popgym/envs/carflag.py diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py new file mode 100644 index 0000000..31ccac9 --- /dev/null +++ b/popgym/envs/carflag.py @@ -0,0 +1,144 @@ +"""Car Flag tasks a car with driving across a 1D line to the correct flag. + +The car must first drive to the oracle flag and then to the correct endpoint. +The agent's observation is a vector of three floats: its position on the line, +its velocity at each timestep, and the goal flag's location when it reaches +the oracle flag. The agent's actions alter its velocity: it can accelerate left, +perform a no-op (maintain current velocity), or accelerate right.""" + +import gymnasium as gym +import numpy as np + +from popgym.core.env import POPGymEnv + + +class CarFlag(POPGymEnv): + """Car Flag tasks a car with driving across a 1D line to the correct flag. + + The car must first drive to the oracle flag and then to the correct endpoint. + The agent's observation is a vector of three floats: its position on the line, + its velocity at each timestep, and the goal flag's location when it reaches + the oracle flag. The agent's actions alter its velocity: it can accelerate left, + perform a no-op (maintain current velocity), or accelerate right. + + Args: + discrete: True, or False. Sets the action space to discrete or continuous. + + Returns: + A gym environment + """ + def __init__(self, discrete: bool): + self.max_position = 1.1 + self.min_position = -self.max_position + self.max_speed = 0.07 + + self.min_action = -1.0 + self.max_action = 1.0 + + self.heaven_position = 1.0 + self.hell_position = -1.0 + self.oracle_position = 0.5 + self.power = 0.0015 + + self.low_state = np.array([self.min_position, -self.max_speed]) + self.high_state = np.array([self.max_position, self.max_speed]) + + # When the cart is within this vicinity, it observes the direction given + # by the oracle + self.oracle_delta = 0.2 + + self.low_state = np.array( + [self.min_position, -self.max_speed, -1.0], dtype=np.float32 + ) + self.high_state = np.array( + [self.max_position, self.max_speed, 1.0], dtype=np.float32 + ) + + self.discrete = discrete + + if self.discrete: + self.action_space = gym.spaces.Discrete(3) + else: + self.action_space = gym.spaces.Box( + low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32 + ) + self.observation_space = gym.spaces.Box( + low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32 + ) + + self.np_random = None + self.state = None + + def step(self, action): + position = self.state[0] + velocity = self.state[1] + if self.discrete: + # 0 is -1, 1 is 0, 2 is 1 + force = action - 1 + else: + force = np.clip(action, -1, 1) + + velocity += force * self.power + velocity = min(velocity, self.max_speed) + velocity = max(velocity, -self.max_speed) + position += velocity + position = min(position, self.max_position) + position = max(position, self.min_position) + if position == self.min_position and velocity < 0: + velocity = 0 + + max_position = max(self.heaven_position, self.hell_position) + min_position = min(self.heaven_position, self.hell_position) + + done = bool(position >= max_position or position <= min_position) + + env_reward = 0 + + if self.heaven_position > self.hell_position: + if position >= self.heaven_position: + env_reward = 1.0 + + if position <= self.hell_position: + env_reward = -1.0 + + if self.heaven_position < self.hell_position: + if position <= self.heaven_position: + env_reward = 1.0 + + if position >= self.hell_position: + env_reward = -1.0 + + direction = 0.0 + if ( + position >= self.oracle_position - self.oracle_delta + and position <= self.oracle_position + self.oracle_delta + ): + if self.heaven_position > self.hell_position: + # Heaven on the right + direction = 1.0 + else: + # Heaven on the left + direction = -1.0 + + self.state = np.array([position, velocity, direction]) + + return self.state, env_reward, done, {"is_success": env_reward > 0} + + def reset(self): + # Randomize the heaven/hell location + if self.np_random.integers(low=0, high=2, size=1) == 0: + self.heaven_position = 1.0 + else: + self.heaven_position = -1.0 + + self.hell_position = -self.heaven_position + + self.state = np.array([self.np_random.uniform(low=-0.2, high=0.2), 0, 0.0]) + return np.array(self.state) + + def get_state(self): + # Return the position of the car, oracle, and goal + return self.state, self.oracle_position, self.heaven_position, self.hell_position + + def render(self): + return None From 9a71557ec1ed36cfd1ca94f579d3b849472272ba Mon Sep 17 00:00:00 2001 From: Ashok Arora Date: Mon, 1 Jul 2024 11:12:13 +0530 Subject: [PATCH 2/5] Adds easy, medium and hard levels The easy level has the range [-1, 1]. The medium level has the range [-3, 3]. The hard level has the range [-5, 5]. --- popgym/envs/carflag.py | 80 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py index 31ccac9..acad337 100644 --- a/popgym/envs/carflag.py +++ b/popgym/envs/carflag.py @@ -6,6 +6,8 @@ the oracle flag. The agent's actions alter its velocity: it can accelerate left, perform a no-op (maintain current velocity), or accelerate right.""" +from typing import Any, Dict, Optional, Tuple + import gymnasium as gym import numpy as np @@ -13,12 +15,12 @@ class CarFlag(POPGymEnv): - """Car Flag tasks a car with driving across a 1D line to the correct flag. + """Car Flag tasks a car with driving across a 1D line to the correct flag. - The car must first drive to the oracle flag and then to the correct endpoint. - The agent's observation is a vector of three floats: its position on the line, - its velocity at each timestep, and the goal flag's location when it reaches - the oracle flag. The agent's actions alter its velocity: it can accelerate left, + The car must first drive to the oracle flag and then to the correct endpoint. + The agent's observation is a vector of three floats: its position on the line, + its velocity at each timestep, and the goal flag's location when it reaches + the oracle flag. The agent's actions alter its velocity: it can accelerate left, perform a no-op (maintain current velocity), or accelerate right. Args: @@ -27,8 +29,21 @@ class CarFlag(POPGymEnv): Returns: A gym environment """ - def __init__(self, discrete: bool): - self.max_position = 1.1 + + def __init__(self, discrete=True, difficulty="easy"): + assert difficulty in ["easy", "medium", "hard"] + if difficulty == "easy": + self.heaven_position = 1.0 + self.hell_position = -1.0 + elif difficulty == "medium": + self.heaven_position = 3.0 + self.hell_position = -3.0 + elif difficulty == "hard": + self.heaven_position = 5.0 + self.hell_position = -5.0 + else: + raise NotImplementedError(f"Invalid difficulty {difficulty}") + self.max_position = self.heaven_position + 0.1 self.min_position = -self.max_position self.max_speed = 0.07 @@ -124,7 +139,13 @@ def step(self, action): return self.state, env_reward, done, {"is_success": env_reward > 0} - def reset(self): + def reset( + self, + *, + seed: Optional[int] = None, + options: Optional[dict] = None, + ) -> Tuple[gym.core.ObsType, Dict[str, Any]]: + super().reset(seed=seed) # Randomize the heaven/hell location if self.np_random.integers(low=0, high=2, size=1) == 0: self.heaven_position = 1.0 @@ -134,11 +155,50 @@ def reset(self): self.hell_position = -self.heaven_position self.state = np.array([self.np_random.uniform(low=-0.2, high=0.2), 0, 0.0]) - return np.array(self.state) + return np.array(self.state), {} def get_state(self): # Return the position of the car, oracle, and goal - return self.state, self.oracle_position, self.heaven_position, self.hell_position + return ( + self.state, + self.oracle_position, + self.heaven_position, + self.hell_position, + ) def render(self): return None + + +if __name__ == "__main__": + e = CarFlag() + obs = e.reset() + e.render() + while not done: + action = np.array(input("Enter action: ")).astype(np.int8) + obs, reward, done, info = e.step(action) + print(f"reward = {reward}") + + +class CarFlagEasy(CarFlag): + """Car Flag tasks a car with driving across a 1D line to the correct flag. + The easy level has the range [-1, 1].""" + + def __init__(self): + super().__init__("easy") + + +class CarFlagMedium(CarFlag): + """Car Flag tasks a car with driving across a 1D line to the correct flag. + The medium level has the range [-3, 3].""" + + def __init__(self): + super().__init__("medium") + + +class CarFlagHard(CarFlag): + """Car Flag tasks a car with driving across a 1D line to the correct flag. + The hard level has the range [-5, 5].""" + + def __init__(self): + super().__init__("hard") From 94f618094887ea0f29088af47ef8c2f2b0196c00 Mon Sep 17 00:00:00 2001 From: Ashok Arora Date: Mon, 1 Jul 2024 11:17:31 +0530 Subject: [PATCH 3/5] Adds the env to the list of envs --- popgym/envs/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/popgym/envs/__init__.py b/popgym/envs/__init__.py index 4df6d00..9a428d7 100644 --- a/popgym/envs/__init__.py +++ b/popgym/envs/__init__.py @@ -19,6 +19,12 @@ BattleshipHard, BattleshipMedium, ) +from popgym.envs.carflag import ( + CarFlag, + CarFlagEasy, + CarFlagMedium, + CarFlagHard, +) from popgym.envs.concentration import ( Concentration, ConcentrationEasy, @@ -225,6 +231,7 @@ Battleship: {"id": "popgym-Battleship-v0"}, Concentration: {"id": "popgym-Concentration-v0"}, MineSweeper: {"id": "popgym-MineSweeper-v0"}, + CarFlag: {"id": "popgym-CarFlag-v0"}, } GAME_EASY: Dict[gym.Env, Dict[str, Any]] = { @@ -232,6 +239,7 @@ BattleshipEasy: {"id": "popgym-BattleshipEasy-v0"}, ConcentrationEasy: {"id": "popgym-ConcentrationEasy-v0"}, MineSweeperEasy: {"id": "popgym-MineSweeperEasy-v0"}, + CarFlagEasy: {"id": "popgym-CarFlagEasy-v0"}, } GAME_MEDIUM: Dict[gym.Env, Dict[str, Any]] = { @@ -239,6 +247,7 @@ BattleshipMedium: {"id": "popgym-BattleshipMedium-v0"}, ConcentrationMedium: {"id": "popgym-ConcentrationMedium-v0"}, MineSweeperMedium: {"id": "popgym-MineSweeperMedium-v0"}, + CarFlagMedium: {"id": "popgym-CarFlagMedium-v0"}, } GAME_HARD: Dict[gym.Env, Dict[str, Any]] = { @@ -246,6 +255,7 @@ BattleshipHard: {"id": "popgym-BattleshipHard-v0"}, ConcentrationHard: {"id": "popgym-ConcentrationHard-v0"}, MineSweeperHard: {"id": "popgym-MineSweeperHard-v0"}, + CarFlagHard: {"id": "popgym-CarFlagHard-v0"}, } ALL_GAME = {**GAME_EASY, **GAME_MEDIUM, **GAME_HARD} From c68b7cff969c394d0dc43f54924849e700ac7889 Mon Sep 17 00:00:00 2001 From: Ashok Arora Date: Sun, 25 Aug 2024 22:28:34 +0530 Subject: [PATCH 4/5] replace done with terminated, truncated --- popgym/envs/carflag.py | 62 ++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py index acad337..9e110ed 100644 --- a/popgym/envs/carflag.py +++ b/popgym/envs/carflag.py @@ -31,16 +31,20 @@ class CarFlag(POPGymEnv): """ def __init__(self, discrete=True, difficulty="easy"): - assert difficulty in ["easy", "medium", "hard"] - if difficulty == "easy": + self.difficulty = difficulty + assert self.difficulty in ["easy", "medium", "hard"] + if self.difficulty == "easy": self.heaven_position = 1.0 self.hell_position = -1.0 - elif difficulty == "medium": + self.max_steps = 200 + elif self.difficulty == "medium": self.heaven_position = 3.0 self.hell_position = -3.0 - elif difficulty == "hard": + self.max_steps = 300 + elif self.difficulty == "hard": self.heaven_position = 5.0 self.hell_position = -5.0 + self.max_steps = 400 else: raise NotImplementedError(f"Invalid difficulty {difficulty}") self.max_position = self.heaven_position + 0.1 @@ -50,14 +54,9 @@ def __init__(self, discrete=True, difficulty="easy"): self.min_action = -1.0 self.max_action = 1.0 - self.heaven_position = 1.0 - self.hell_position = -1.0 self.oracle_position = 0.5 self.power = 0.0015 - self.low_state = np.array([self.min_position, -self.max_speed]) - self.high_state = np.array([self.max_position, self.max_speed]) - # When the cart is within this vicinity, it observes the direction given # by the oracle self.oracle_delta = 0.2 @@ -81,10 +80,18 @@ def __init__(self, discrete=True, difficulty="easy"): low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32 ) + self.state_space = gym.spaces.Box( + low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32 + ) + self.np_random = None self.state = None + self.current_step = 0 + def step(self, action): + self.current_step += 1 + position = self.state[0] velocity = self.state[1] if self.discrete: @@ -105,7 +112,8 @@ def step(self, action): max_position = max(self.heaven_position, self.hell_position) min_position = min(self.heaven_position, self.hell_position) - done = bool(position >= max_position or position <= min_position) + terminated = bool(position >= max_position or position <= min_position) + truncated = bool(self.current_step >= self.max_steps) env_reward = 0 @@ -135,9 +143,19 @@ def step(self, action): # Heaven on the left direction = -1.0 + position = np.clip(position, self.min_position, self.max_position) + velocity = np.clip(velocity, -self.max_speed, self.max_speed) + direction = np.clip(direction, -1.0, 1.0) + self.state = np.array([position, velocity, direction]) - return self.state, env_reward, done, {"is_success": env_reward > 0} + return ( + self.state, + env_reward, + terminated, + truncated, + {"is_success": env_reward > 0}, + ) def reset( self, @@ -148,13 +166,29 @@ def reset( super().reset(seed=seed) # Randomize the heaven/hell location if self.np_random.integers(low=0, high=2, size=1) == 0: - self.heaven_position = 1.0 + if self.difficulty == "easy": + self.heaven_position = 1.0 + elif self.difficulty == "medium": + self.heaven_position = 3.0 + elif self.difficulty == "hard": + self.heaven_position = 5.0 else: - self.heaven_position = -1.0 + if self.difficulty == "easy": + self.heaven_position = -1.0 + elif self.difficulty == "medium": + self.heaven_position = -3.0 + elif self.difficulty == "hard": + self.heaven_position = -5.0 self.hell_position = -self.heaven_position - self.state = np.array([self.np_random.uniform(low=-0.2, high=0.2), 0, 0.0]) + position = self.np_random.uniform(low=self.min_position, high=self.max_position) + velocity = 0.0 + direction = 0.0 + + self.state = np.array([position, velocity, direction], dtype=np.float32) + self.current_step = 0 # Reset step counter + return np.array(self.state), {} def get_state(self): From a537918dc007a5ef96c42effa49cb54a68e68cc1 Mon Sep 17 00:00:00 2001 From: Ashok Arora Date: Tue, 27 Aug 2024 22:50:54 +0530 Subject: [PATCH 5/5] fix the state_space dim error --- popgym/envs/carflag.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/popgym/envs/carflag.py b/popgym/envs/carflag.py index 9e110ed..52cee2d 100644 --- a/popgym/envs/carflag.py +++ b/popgym/envs/carflag.py @@ -80,10 +80,18 @@ def __init__(self, discrete=True, difficulty="easy"): low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32 ) - self.state_space = gym.spaces.Box( - low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32 + # Define the lower and upper bounds for the state space + low = np.array( + [self.min_position, -self.max_speed, -1.0, self.oracle_position, -5, -5], + dtype=np.float32, + ) + high = np.array( + [self.max_position, self.max_speed, 1.0, self.oracle_position, 5, 5], + dtype=np.float32, ) + self.state_space = gym.spaces.Box(low=low, high=high, dtype=np.float32) + self.np_random = None self.state = None @@ -143,11 +151,7 @@ def step(self, action): # Heaven on the left direction = -1.0 - position = np.clip(position, self.min_position, self.max_position) - velocity = np.clip(velocity, -self.max_speed, self.max_speed) - direction = np.clip(direction, -1.0, 1.0) - - self.state = np.array([position, velocity, direction]) + self.state = np.array([position, velocity, direction], dtype=np.float32) return ( self.state, @@ -189,15 +193,20 @@ def reset( self.state = np.array([position, velocity, direction], dtype=np.float32) self.current_step = 0 # Reset step counter - return np.array(self.state), {} + return self.state, {} def get_state(self): # Return the position of the car, oracle, and goal - return ( - self.state, - self.oracle_position, - self.heaven_position, - self.hell_position, + return np.array( + [ + self.state[0], + self.state[1], + self.state[2], + self.oracle_position, + self.heaven_position, + self.hell_position, + ], + dtype=np.float32, ) def render(self):