Skip to content

Commit

Permalink
progress on rl ai implementation + BUG
Browse files Browse the repository at this point in the history
bug when playing numeral cards, e.g. playing P1A on first move is right, but can play this as P1A1, P1A2, etc. also, as though the numeral P1 is a face card. causes issues for rl as it is randomly selecting one of these erroneous P options. need to work on that.
  • Loading branch information
r3w0p committed Jan 3, 2025
1 parent 53312d3 commit b808e0a
Show file tree
Hide file tree
Showing 13 changed files with 442 additions and 447 deletions.
1 change: 0 additions & 1 deletion .github/FUNDING.yml

This file was deleted.

2 changes: 1 addition & 1 deletion README
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
| (_| (_| | | | (_| |\ \/ / (_| | | | |
\___\__,_|_| \__,_| \__/ \__,_|_| |_|

| v2.0.0 | GPL-3.0 | (c) 2022-2024 r3w0p |
| v2.0.0 | GPL-3.0 | (c) 2022-2025 r3w0p |

A command-line version of the Caravan card game from Fallout: New Vegas.

Expand Down
14 changes: 7 additions & 7 deletions include/caravan/model/game.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
#define CARAVAN_MODEL_GAME_H

#include <cstdint>
#include <memory>

#include "caravan/model/table.h"
#include "caravan/model/player.h"
#include "caravan/core/exceptions.h"

class Game {
protected:
Table *table_ptr{};
Player *pa_ptr{};
Player *pb_ptr{};
Player *p_turn;
std::unique_ptr<Table> table;
std::unique_ptr<Player> player_a;
std::unique_ptr<Player> player_b;
Player *player_turn;

int8_t compare_bids(CaravanName cvname1, CaravanName cvname2);

Expand All @@ -33,12 +35,10 @@ class Game {
/**
* @param config Game configuration.
*
* @throws CaravanFatalException Invalid player names.
* @throws CaravanFatalException Invalid first player in game configuration.
*/
explicit Game(GameConfig *gc);

~Game();

static CaravanName get_opposite_caravan_name(CaravanName cvname);

Player *get_player(PlayerName pname);
Expand Down
6 changes: 3 additions & 3 deletions include/caravan/model/player.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,22 @@
#define CARAVAN_MODEL_PLAYER_H

#include <array>
#include <memory>

#include "caravan/model/deck.h"


class Player {
protected:
PlayerName name;
Deck *deck;
std::unique_ptr<Deck> deck;
Hand hand;
uint8_t i_hand;
uint16_t moves;

public:
explicit Player(PlayerName pn, Deck *d);

~Player();

Card get_from_hand_at(uint8_t pos);

Hand get_hand();
Expand Down
24 changes: 15 additions & 9 deletions include/caravan/model/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,30 @@

#include <array>
#include <cstdint>
#include <memory>
#include "caravan/model/caravan.h"

class Table {
protected:
Caravan *a = new Caravan(CARAVAN_A);
Caravan *b = new Caravan(CARAVAN_B);
Caravan *c = new Caravan(CARAVAN_C);
Caravan *d = new Caravan(CARAVAN_D);
Caravan *e = new Caravan(CARAVAN_E);
Caravan *f = new Caravan(CARAVAN_F);
std::unique_ptr<Caravan> caravan_a = std::make_unique<Caravan>(CARAVAN_A);
std::unique_ptr<Caravan> caravan_b = std::make_unique<Caravan>(CARAVAN_B);
std::unique_ptr<Caravan> caravan_c = std::make_unique<Caravan>(CARAVAN_C);
std::unique_ptr<Caravan> caravan_d = std::make_unique<Caravan>(CARAVAN_D);
std::unique_ptr<Caravan> caravan_e = std::make_unique<Caravan>(CARAVAN_E);
std::unique_ptr<Caravan> caravan_f = std::make_unique<Caravan>(CARAVAN_F);

std::array<Caravan *, TABLE_CARAVANS_MAX> caravans = {a, b, c, d, e, f};
std::array<Caravan *, TABLE_CARAVANS_MAX> caravans = {
caravan_a.get(),
caravan_b.get(),
caravan_c.get(),
caravan_d.get(),
caravan_e.get(),
caravan_f.get()
};

public:
explicit Table() = default;

~Table();

/**
* @param cvname The caravan to get.
* @return Pointer to the caravan.
Expand Down
142 changes: 75 additions & 67 deletions src/caravan/core/training.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,93 +120,101 @@ void populate_action_space(ActionSpace *as) {
}

void train_on_game(Game *game, QTable &q_table, ActionSpace &action_space, TrainConfig &tc, std::mt19937 &gen) {
// Get player names
PlayerName pturn = game->get_player_turn();
PlayerName popp = pturn == PLAYER_ABC ? PLAYER_DEF : PLAYER_ABC;

// Read game state and maybe add to the q-table if state not present
GameState gs;
get_game_state(&gs, game, pturn);

if (!q_table.contains(gs)) {
// TODO maybe is this needed: q_table[gs] = {};
for (uint16_t i = 0; i < SIZE_ACTION_SPACE; i++) {
q_table[gs][action_space[i]] = 0;
}
}

// Choose an action
std::string action;
uint16_t action_index;
GameCommand command;
std::vector<std::string> invalid;

std::uniform_int_distribution<uint16_t> dist_action(0, SIZE_ACTION_SPACE - 1);
std::uniform_real_distribution<float> dist_explore(0, 1);
bool explore = dist_explore(gen) < tc.explore;

while (true) {
if (explore) {
// If exploring, fetch a random action from the action space
action = action_space[dist_action(gen)];
// Play until winner
while (game->get_winner() == NO_PLAYER) {
// Determine player
PlayerName pturn = game->get_player_turn();
PlayerName popp = pturn == PLAYER_ABC ? PLAYER_DEF : PLAYER_ABC;

} else {
// Otherwise, pick the optimal action from the q-table
uint16_t pick_index = 0;
float pick_value = q_table[gs][action_space[pick_index]];

for (uint16_t i_action = 1; i_action < SIZE_ACTION_SPACE; i_action++) {
// Ignore if already found to be invalid
if (std::count(invalid.begin(), invalid.end(), action_space[i_action]) > 0) {
continue;
}
// Get game state in relation to current player
get_game_state(&gs, game, pturn);

if (q_table[gs][action_space[i_action]] > pick_value) {
pick_index = i_action;
pick_value = q_table[gs][action_space[pick_index]];
}
// Maybe add game state and actions if new state discovered
if (!q_table.contains(gs)) {
// TODO maybe is this needed: q_table[gs] = {};
for (uint16_t i = 0; i < SIZE_ACTION_SPACE; i++) {
q_table[gs][action_space[i]] = 0;
}

action = action_space[pick_index];
}

// If action in invalid list, ignore it and try another
if (std::count(invalid.begin(), invalid.end(), action) > 0) {
continue;
// Use action pool that depletes as actions are found to be invalid
std::vector<std::string> action_pool;
for (int i = 0; i < SIZE_ACTION_SPACE; i++) {
action_pool.push_back(action_space[i]);
}

// Generate command for action
command = generate_command(action, true);
// Find a valid action
while (true) {
if (explore) {
// If exploring, fetch a random action from the action pool
std::uniform_int_distribution<uint16_t> dist_pool(0, action_pool.size() - 1);
action_index = dist_pool(gen);
action = action_pool[action_index];

} else {
// Otherwise, pick the optimal action from the q-table
action_index = 0;
float action_value = q_table[gs][action_pool[action_index]];

for (uint16_t i_action = 1; i_action < action_pool.size(); i_action++) {
// Change pick if next action has greater value
if (q_table[gs][action_pool[i_action]] > action_value) {
action_index = i_action;
action_value = q_table[gs][action_pool[action_index]];
}
}

action = action_pool[action_index];
}

// Generate command for action
command = generate_command(action, true);

// Check action is valid
if (!game->check_option(&command)) {
// If invalid action for state, add to invalid list and try again
invalid.push_back(action);
continue;
// TODO error if all actions are invalid? should not be possible!

} else {
// Pick action
break;
// Use action if valid
if (game->check_option(&command)) break;

// Remove action from pool and try again
action_pool.erase(action_pool.begin() + action_index);
}
}

// Perform action
// (Exceptions intentionally not handled)
game->play_option(&command);
printf("[%s] %s (%hu, %llu)\n",
pturn == PLAYER_ABC ? "ABC" : "DEF",
action.c_str(),
action_index,
action_pool.size());

// Measure reward (1 = win, -1 = loss, 0 = neither)
uint16_t reward;
// Perform action
// (Exceptions intentionally not handled)
game->play_option(&command);

if (game->get_winner() == pturn) {
reward = 1;
} else if (game->get_winner() == popp) {
reward = -1;
} else {
reward = 0;
}
/*
// Measure reward (1 = win, -1 = loss, 0 = neither)
uint16_t reward;
if (game->get_winner() == pturn) {
reward = 1;
} else if (game->get_winner() == popp) {
reward = -1;
} else {
reward = 0;
}
// TODO update q_table
float q_value_former = q_table[gs][action];
GameState gs_new;
get_game_state(&gs_new, game, pturn);
// if a winner: +1 for winning player, -1 for losing player
// TODO update q_table
// float q_value_former = q_table[gs][action];
// GameState gs_new;
// get_game_state(&gs_new, game, pturn);
// if a winner: +1 for winning player, -1 for losing player
*/
}
}
78 changes: 39 additions & 39 deletions src/caravan/model/caravan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,6 @@
#include "caravan/core/exceptions.h"


/*
* PROTECTED
*/

uint8_t Caravan::numeral_rank_to_uint8_t(Rank rank) {
switch (rank) {
case ACE:
return 1;
case TWO:
return 2;
case THREE:
return 3;
case FOUR:
return 4;
case FIVE:
return 5;
case SIX:
return 6;
case SEVEN:
return 7;
case EIGHT:
return 8;
case NINE:
return 9;
case TEN:
return 10;
default:
throw CaravanFatalException("Invalid rank.");
}
}

void Caravan::remove_numeral_card(uint8_t index) {
for (; (index + 1) < i_track; ++index) {
track[index] = track[index + 1];
}

i_track -= 1;
}

/*
* PUBLIC
*/
Expand Down Expand Up @@ -373,3 +334,42 @@ bool Caravan::remove_suit(Suit suit, uint8_t pos_exclude, bool check_only) {

return true;
}

/*
* PROTECTED
*/

uint8_t Caravan::numeral_rank_to_uint8_t(Rank rank) {
switch (rank) {
case ACE:
return 1;
case TWO:
return 2;
case THREE:
return 3;
case FOUR:
return 4;
case FIVE:
return 5;
case SIX:
return 6;
case SEVEN:
return 7;
case EIGHT:
return 8;
case NINE:
return 9;
case TEN:
return 10;
default:
throw CaravanFatalException("Invalid rank.");
}
}

void Caravan::remove_numeral_card(uint8_t index) {
for (; (index + 1) < i_track; ++index) {
track[index] = track[index + 1];
}

i_track -= 1;
}
Loading

0 comments on commit b808e0a

Please sign in to comment.