This repository has been archived by the owner on Jan 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathsimulator.h
447 lines (350 loc) · 13.2 KB
/
simulator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
// Copyright (c) 2017 Baidu Inc. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <gflags/gflags.h>
#include <ctime>
#include <limits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <boost/python.hpp>
#include "data_packet.h"
#include "simulator_util.h"
#include "simulator_entity.h"
// This is for compatibility with old version of glog which uses google::
// Newer versions have resolved this issue
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
DECLARE_int32(context);
DECLARE_int32(max_steps);
DECLARE_bool(lock_step);
DECLARE_bool(color);
DECLARE_bool(pause_screen);
namespace simulator {
// all possilbe game status
enum GameOverCode {
ALIVE = 0, // normal state
MAX_STEP = 1 << 0, // reach maximum number of steps
DEAD = 1 << 1, // lose all lives
SUCCESS = 1 << 2, // succeed the game
LOST_LIFE = 1 << 3 // lose one life
};
typedef std::vector<uint8_t> GameFrame;
class GameSimulator {
public:
GameSimulator();
virtual ~GameSimulator() {}
/**
* @brief Reset game
*
* This function is called everytime a new game begins
*/
virtual void reset_game();
/**
* @brief return the game status
*/
virtual int game_over() {
if (FLAGS_max_steps > 0 && num_steps_ >= FLAGS_max_steps) {
return MAX_STEP;
} else {
return ALIVE;
}
}
/**
* @brief get the number of actions the game allowed
*/
virtual int get_num_actions() = 0;
/**
* @brief get the number of lives
*/
virtual int get_lives() = 0;
/**
* @brief Visualize the current screen *with* the current reward provided.
* The reward can be shown on the screen.
*/
virtual void show_screen(float reward) = 0;
/**
* @brief take a single-step action represent by `actions` and return the
* reward
*
* @param[in] actions action to take
* @return reward by taking the action
*/
virtual float take_action(const StatePacket& actions) = 0;
/**
* @brief get the current screen
* Can't use GameFrame because some game has floating-number screen
*/
virtual void get_screen(StatePacket& screen) = 0;
/**
* @brief clear screens_ and initialize it using current screen.
*/
void init_screen();
/**
* @brief update screens_ by adding current screen at the beginning and
* remove the oldest screen
*/
void make_context_screens();
/**
* @briefn pack all info into a StatePacket.
*
* Derived simulators should override `define_state_specss`
* to get the state structure they need, and fill in the values of all info
* except "reward" and "screen", which are hanleded in this function.
*
* Consecutive calls of this function with no calls of `take_actions' in
* between should return the same state.
*
* @param[in] reward the reward received when entering current state
* @param[out] state state with all info
*/
void get_state_data(float reward, StatePacket& state) {
define_state_specs(state);
// All simulators have "reward" and "screen" in their state structures,
// so we put common codes here.
fill_in_reward_and_screen(reward, state);
}
/**
* @brief Specify what types of information are needed in the state.
*
* TODO: Ideally, this function should not be public. But in
* AgentSpecifiSimulator::define_state_specs, we need to access
* this function from outside, so we make it public accessible.
*/
virtual void define_state_specs(StatePacket& state) = 0;
/**
* @brief Extra information for debugging or displaying (at each time step)
*
* Derived classes should override this function to provide game-specific
* extra info. Usually this info shouldn't be used by the agent for
* learning. As a result, all values in the map are designed as strings.
*/
virtual void get_extra_info(std::string& info) { info = ""; }
/**
* @brief repeatedly take `actions` for `actrep` times, and return the
* cumulative reward after taking actrep actions.
*
* screens_ will be updated after taking actions.
*/
float take_actions(const StatePacket& actions, int actrep, bool show_screen, float acc_reward);
int64_t get_num_steps() { return num_steps_; }
virtual std::string last_action() { return last_action_; }
virtual bool last_action_success() { return last_action_success_; }
static const int N_BUFFERS =
2; // "reward" and "screen" are two basic buffers
/**
* @brief get the image dimensions used by CNN
*/
virtual void get_screen_out_dimensions(size_t& height,
size_t& width,
size_t& channels) = 0;
static std::string decode_game_over_code(int code);
protected:
int64_t num_steps_; // number of steps since the beginning of the game
std::string last_action_; // store the last action for the agent
bool last_action_success_; // whether the last action was successful or not
StatePacket screens_; // maintain the most recent FLAGS_context screens
// global mutex to serialize calls of cv::imshow, which is not thread-safe.
static std::mutex s_display_mutex_;
/**
* @brief return the id of the thread that calls `show_screen` as the screen
* name.
*
* In case of multiple concurrent games, we use thread ids to identify
* imshow windows.
*/
std::string get_screen_name();
/**
* @brief allocate space for screens_
*/
void init_context_screens(bool is_uint8);
/**
* @brief Shift the context by one screen of size screen_sz to right.
*
* This function is designed to support screens of both types uint8_t and
* float.
*
* @param[in] cur_buf
* @param[out] context_buf
* @param[in] n_contexts
* @param[in] screen_sz
*/
template <typename T>
void shift_context(BufferPtr cur_buf,
BufferPtr context_buf,
int n_contexts,
size_t screen_sz);
/**
* @brief Fill the content of "reward" and "screen" of `state`.
*
* @param[in] reward reward
* @param[out] state state to fill
*/
void fill_in_reward_and_screen(float reward, StatePacket& state);
};
typedef std::shared_ptr<GameSimulator> SimulatorPtr;
/*
* class that defines the interface supporting multi-agent simulator
*/
class GameSimulatorMulti : public GameSimulator {
public:
GameSimulatorMulti() : num_agents_(0), active_agent_id_(0) {}
virtual ~GameSimulatorMulti() {}
virtual void set_active_agent_id(int agent_id) {
active_agent_id_ = agent_id;
}
// return the newly added agent id
virtual int add_agent() {
return num_agents_ ++;
}
// this function is called to return max_steps to AgentSpecificSimulator
int64_t get_max_steps() { return FLAGS_max_steps; }
protected:
int num_agents_;
int active_agent_id_;
};
typedef std::shared_ptr<GameSimulatorMulti> SimulatorMultiPtr;
// A buffer that stores the communication between agent and teacher,
// coordinated by the teaching environment
// It is SHARED by all the tasks; a teacher only has one buffer
struct TeachingEnvBuffer {
///////// teacher's buffer /////////
std::string teacher_sent;
std::string teacher_sent_type; // for display only; not for agent
double reward;
std::string event; // stores an event's name during the session
// e.g., "correct_goal", "wrong_goal", "hit_wall"
// can be used by the environment to decide game_over()
///////// agent's buffer ////////
std::string agent_sent;
int agent_action;
bool agent_action_successful;
void clear_teacher_env_buffer() {
teacher_sent = "";
teacher_sent_type = "";
reward = 0;
event = "";
}
void clear_agent_env_buffer() {
agent_sent = "";
agent_action = -1;
agent_action_successful = false;
}
};
// TeachingEnvironment only supports single sentence
class TeachingEnvironment {
public:
TeachingEnvironment()
: beginning_(true),
num_games_since_simulation_(0) {}
virtual ~TeachingEnvironment() {}
bool can_record_teacher_sent_in_buffer() const {
return buffer_.teacher_sent.empty();
}
void record_teacher_sent_in_buffer(const std::string& sentence) {
buffer_.teacher_sent = sentence;
}
const std::string& get_teacher_sent_from_buffer() const {
return buffer_.teacher_sent;
}
void record_teacher_sent_type_in_buffer(const std::string& type) {
buffer_.teacher_sent_type = type;
}
const std::string& get_teacher_sent_type_from_buffer() const {
return buffer_.teacher_sent_type;
}
void add_teacher_reward(double reward) { buffer_.reward += reward; }
double get_teacher_reward() const { return buffer_.reward; }
void record_event_in_buffer(const std::string& event) {
buffer_.event = event;
}
const std::string& get_event_from_buffer() const { return buffer_.event; }
void record_agent_sent_in_buffer(const std::string& sentence) {
buffer_.agent_sent = sentence;
}
const std::string& get_agent_sent_from_buffer() const {
return buffer_.agent_sent;
}
void record_agent_action_successful_in_buffer(bool successful) {
buffer_.agent_action_successful = successful;
}
bool get_agent_action_successful_from_buffer() const {
return buffer_.agent_action_successful;
}
void record_agent_action_in_buffer(int action) {
buffer_.agent_action = action;
}
int get_agent_action_from_buffer() const { return buffer_.agent_action; }
// Some events can only be obtained from cpp end (e.g., collisions in
// XWorld3d, so we need this function to send such events to python end
virtual std::string get_events_of_game() {
return std::string("");
}
// get all the entities in the current game
virtual void get_all_entities(std::vector<Entity>& entities) = 0;
// get the python environment
virtual boost::python::object get_py_env() = 0;
// update the environment by the teacher's changes
virtual void update_environment() = 0;
// The linking node between the teacher and the agent
virtual void apply_teacher_actions() = 0;
// get the world
virtual void get_world_dimensions(double& X, double& Y, double& Z) = 0;
virtual void reset_game() {
beginning_ = true;
num_games_since_simulation_++;
}
float take_action() {
beginning_ = false;
// clear teacher's buffer after agent takes an action
clear_teacher_env_buffer();
return 0;
}
void clear_agent_env_buffer() { buffer_.clear_agent_env_buffer(); }
void clear_teacher_env_buffer() { buffer_.clear_teacher_env_buffer(); }
bool beginning_of_the_game() { return beginning_; }
int num_games_since_simulation() { return num_games_since_simulation_; }
protected:
bool beginning_; // whether any agent has taken action since the game starts
int num_games_since_simulation_; // how many games have been played
TeachingEnvBuffer
buffer_; // a buffer to store exchanging information between
// the teacher and the agent
};
typedef std::shared_ptr<TeachingEnvironment> TeachingEnvPtr;
/**
class that implements a agent-specific interface from a multi-agent simulator
by maintaining the agent_id and SimulatorPtr
*/
class AgentSpecificSimulator : public GameSimulator {
public:
AgentSpecificSimulator(SimulatorMultiPtr simulator_ptr, int agent_id = 0);
void reset_game() override;
int game_over() override;
int get_num_actions() override;
int get_lives() override;
void show_screen(float reward) override;
std::string last_action() override { return simulator_ptr_->last_action(); }
float take_action(const StatePacket& actions) override;
bool last_action_success() override { return simulator_ptr_->last_action_success(); }
void get_screen(StatePacket& screen) override;
void get_extra_info(std::string& info) override;
void get_screen_out_dimensions(size_t& height,
size_t& width,
size_t& channels) override;
void define_state_specs(StatePacket& state);
private:
void activate_my_agent() { simulator_ptr_->set_active_agent_id(agent_id_); }
SimulatorMultiPtr simulator_ptr_;
const int agent_id_;
};
} // namespace simulator