Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds carflag-v0 env #34

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions popgym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
BattleshipHard,
BattleshipMedium,
)
from popgym.envs.carflag import (
CarFlag,
CarFlagEasy,
CarFlagMedium,
CarFlagHard,
)
from popgym.envs.concentration import (
Concentration,
ConcentrationEasy,
Expand Down Expand Up @@ -225,27 +231,31 @@
Battleship: {"id": "popgym-Battleship-v0"},
Concentration: {"id": "popgym-Concentration-v0"},
MineSweeper: {"id": "popgym-MineSweeper-v0"},
CarFlag: {"id": "popgym-CarFlag-v0"},
}

GAME_EASY: Dict[gym.Env, Dict[str, Any]] = {
HigherLowerEasy: {"id": "popgym-HigherLowerEasy-v0"},
BattleshipEasy: {"id": "popgym-BattleshipEasy-v0"},
ConcentrationEasy: {"id": "popgym-ConcentrationEasy-v0"},
MineSweeperEasy: {"id": "popgym-MineSweeperEasy-v0"},
CarFlagEasy: {"id": "popgym-CarFlagEasy-v0"},
}

GAME_MEDIUM: Dict[gym.Env, Dict[str, Any]] = {
HigherLowerMedium: {"id": "popgym-HigherLowerMedium-v0"},
BattleshipMedium: {"id": "popgym-BattleshipMedium-v0"},
ConcentrationMedium: {"id": "popgym-ConcentrationMedium-v0"},
MineSweeperMedium: {"id": "popgym-MineSweeperMedium-v0"},
CarFlagMedium: {"id": "popgym-CarFlagMedium-v0"},
}

GAME_HARD: Dict[gym.Env, Dict[str, Any]] = {
HigherLowerHard: {"id": "popgym-HigherLowerHard-v0"},
BattleshipHard: {"id": "popgym-BattleshipHard-v0"},
ConcentrationHard: {"id": "popgym-ConcentrationHard-v0"},
MineSweeperHard: {"id": "popgym-MineSweeperHard-v0"},
CarFlagHard: {"id": "popgym-CarFlagHard-v0"},
}

ALL_GAME = {**GAME_EASY, **GAME_MEDIUM, **GAME_HARD}
Expand Down
247 changes: 247 additions & 0 deletions popgym/envs/carflag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
"""Car Flag tasks a car with driving across a 1D line to the correct flag.

The car must first drive to the oracle flag and then to the correct endpoint.
The agent's observation is a vector of three floats: its position on the line,
its velocity at each timestep, and the goal flag's location when it reaches
the oracle flag. The agent's actions alter its velocity: it can accelerate left,
perform a no-op (maintain current velocity), or accelerate right."""

from typing import Any, Dict, Optional, Tuple

import gymnasium as gym
import numpy as np

from popgym.core.env import POPGymEnv


class CarFlag(POPGymEnv):
"""Car Flag tasks a car with driving across a 1D line to the correct flag.

The car must first drive to the oracle flag and then to the correct endpoint.
The agent's observation is a vector of three floats: its position on the line,
its velocity at each timestep, and the goal flag's location when it reaches
the oracle flag. The agent's actions alter its velocity: it can accelerate left,
perform a no-op (maintain current velocity), or accelerate right.

Args:
discrete: True, or False. Sets the action space to discrete or continuous.

Returns:
A gym environment
"""

def __init__(self, discrete=True, difficulty="easy"):
self.difficulty = difficulty
assert self.difficulty in ["easy", "medium", "hard"]
if self.difficulty == "easy":
self.heaven_position = 1.0
self.hell_position = -1.0
self.max_steps = 200
elif self.difficulty == "medium":
self.heaven_position = 3.0
self.hell_position = -3.0
self.max_steps = 300
elif self.difficulty == "hard":
self.heaven_position = 5.0
self.hell_position = -5.0
self.max_steps = 400
else:
raise NotImplementedError(f"Invalid difficulty {difficulty}")
self.max_position = self.heaven_position + 0.1
self.min_position = -self.max_position
self.max_speed = 0.07

self.min_action = -1.0
self.max_action = 1.0

self.oracle_position = 0.5
self.power = 0.0015

# When the cart is within this vicinity, it observes the direction given
# by the oracle
self.oracle_delta = 0.2

self.low_state = np.array(
[self.min_position, -self.max_speed, -1.0], dtype=np.float32
)
self.high_state = np.array(
[self.max_position, self.max_speed, 1.0], dtype=np.float32
)

self.discrete = discrete

if self.discrete:
self.action_space = gym.spaces.Discrete(3)
else:
self.action_space = gym.spaces.Box(
low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
)
self.observation_space = gym.spaces.Box(
low=self.low_state, high=self.high_state, shape=(3,), dtype=np.float32
)

# Define the lower and upper bounds for the state space
low = np.array(
[self.min_position, -self.max_speed, -1.0, self.oracle_position, -5, -5],
dtype=np.float32,
)
high = np.array(
[self.max_position, self.max_speed, 1.0, self.oracle_position, 5, 5],
dtype=np.float32,
)

self.state_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)

self.np_random = None
self.state = None

self.current_step = 0

def step(self, action):
self.current_step += 1

position = self.state[0]
velocity = self.state[1]
if self.discrete:
# 0 is -1, 1 is 0, 2 is 1
force = action - 1
else:
force = np.clip(action, -1, 1)

velocity += force * self.power
velocity = min(velocity, self.max_speed)
velocity = max(velocity, -self.max_speed)
position += velocity
position = min(position, self.max_position)
position = max(position, self.min_position)
if position == self.min_position and velocity < 0:
velocity = 0

max_position = max(self.heaven_position, self.hell_position)
min_position = min(self.heaven_position, self.hell_position)

terminated = bool(position >= max_position or position <= min_position)
truncated = bool(self.current_step >= self.max_steps)

env_reward = 0

if self.heaven_position > self.hell_position:
if position >= self.heaven_position:
env_reward = 1.0

if position <= self.hell_position:
env_reward = -1.0

if self.heaven_position < self.hell_position:
if position <= self.heaven_position:
env_reward = 1.0

if position >= self.hell_position:
env_reward = -1.0

direction = 0.0
if (
position >= self.oracle_position - self.oracle_delta
and position <= self.oracle_position + self.oracle_delta
):
if self.heaven_position > self.hell_position:
# Heaven on the right
direction = 1.0
else:
# Heaven on the left
direction = -1.0

self.state = np.array([position, velocity, direction], dtype=np.float32)

return (
self.state,
env_reward,
terminated,
truncated,
{"is_success": env_reward > 0},
)

def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
) -> Tuple[gym.core.ObsType, Dict[str, Any]]:
super().reset(seed=seed)
# Randomize the heaven/hell location
if self.np_random.integers(low=0, high=2, size=1) == 0:
if self.difficulty == "easy":
self.heaven_position = 1.0
elif self.difficulty == "medium":
self.heaven_position = 3.0
elif self.difficulty == "hard":
self.heaven_position = 5.0
else:
if self.difficulty == "easy":
self.heaven_position = -1.0
elif self.difficulty == "medium":
self.heaven_position = -3.0
elif self.difficulty == "hard":
self.heaven_position = -5.0

self.hell_position = -self.heaven_position

position = self.np_random.uniform(low=self.min_position, high=self.max_position)
velocity = 0.0
direction = 0.0

self.state = np.array([position, velocity, direction], dtype=np.float32)
self.current_step = 0 # Reset step counter

return self.state, {}

def get_state(self):
# Return the position of the car, oracle, and goal
return np.array(
[
self.state[0],
self.state[1],
self.state[2],
self.oracle_position,
self.heaven_position,
self.hell_position,
],
dtype=np.float32,
)

def render(self):
return None


if __name__ == "__main__":
e = CarFlag()
obs = e.reset()
e.render()
while not done:
action = np.array(input("Enter action: ")).astype(np.int8)
obs, reward, done, info = e.step(action)
print(f"reward = {reward}")


class CarFlagEasy(CarFlag):
"""Car Flag tasks a car with driving across a 1D line to the correct flag.
The easy level has the range [-1, 1]."""

def __init__(self):
super().__init__("easy")


class CarFlagMedium(CarFlag):
"""Car Flag tasks a car with driving across a 1D line to the correct flag.
The medium level has the range [-3, 3]."""

def __init__(self):
super().__init__("medium")


class CarFlagHard(CarFlag):
"""Car Flag tasks a car with driving across a 1D line to the correct flag.
The hard level has the range [-5, 5]."""

def __init__(self):
super().__init__("hard")