From d9302933b2a43ac4e9dc5c985c9de068a0e210de Mon Sep 17 00:00:00 2001 From: Daniel Derycke <104598708+DHDev0@users.noreply.github.com> Date: Thu, 19 Jan 2023 02:31:59 -0500 Subject: [PATCH] manage illegal move --- README.md | 1 + game.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2e693e3..c11559b 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,7 @@ Core Muzero feature: * [x] MCTS with 0 simulation (use of prior) or any number of simulation. * [x] Model weights automatically saved at best selfplay average reward. * [x] Priority or Uniform for sampling in replay buffer. +* [X] Manage illegal move with negative reward. * [X] Scale the loss using the importance sampling ratio. * [x] Custom "Loss function" class to apply transformation and loss on label/prediction. * [X] Load your pretrained model from tag number. diff --git a/game.py b/game.py index b401751..574eab1 100644 --- a/game.py +++ b/game.py @@ -117,9 +117,19 @@ def observation(self,observation_shape=None, #manage feedback observation else: state = feedback[0] - + self.feedback_state = state return state + def step(self,action): + try: + next_step = (self.env.step(action)) + except: + obs = self.feedback_state + reward = min(-len(self.rewards),-self.limit_of_game_play,-1) + done = self.done + next_step = (obs,reward,done) + return next_step + def close(self): return self.env.close() @@ -229,7 +239,7 @@ def policy_step(self, root = None , temperature = 0 , feedback = None, iteration # # # apply mouve and return variable of the env # # # save game variable to a list to return them #contain [observation, reward, done, info] + [meta_data for som gym env] - step_output = (self.env.step(self.action_map[selected_action])) + step_output = self.step(self.action_map[selected_action]) #Get the new observation generate by step if self.rgb_observation :