problems that i'll fix another time...

- training doesn't start with 50% win rate despite both players making purely random moves - need to implement jack, queen, king, joker - random vs policy: policy broken and needs revising anyway, need to play policy move as long as there's a good one to play: exhaust known ones then try random?
r3w0p · Jan 7, 2025 · ffa6d73 · ffa6d73
1 parent fddcad1
commit ffa6d73
Show file tree

Hide file tree

Showing 5 changed files with 112 additions and 25 deletions.
diff --git a/include/caravan/core/common.h b/include/caravan/core/common.h
@@ -137,7 +137,9 @@ bool is_face_card(Card c);
 
 std::string caravan_letter(CaravanName caravan_name);
 
-uint8_t numeral_rank_value(Card c);
+uint8_t numeral_rank_value(const Card &c);
+
+uint8_t slot_value(const Slot &s);
 
 GameCommand generate_command(std::string input, bool confirmed);
 

diff --git a/include/caravan/core/training.h b/include/caravan/core/training.h
@@ -36,7 +36,7 @@ const uint8_t ACTION_PLAY_NUMERAL_1 = 30;
 const uint8_t ACTION_PLAY_NUMERAL_2 = 31;
 const uint8_t ACTION_PLAY_NUMERAL_3 = 32;
 
-const uint8_t ACTION_PLAY_JACK_SELF = 40;  // on lowest bust
+const uint8_t ACTION_PLAY_JACK_SELF = 40;
 const uint8_t ACTION_PLAY_JACK_OPP = 41;  // on highest non-bust
 
 const uint8_t ACTION_PLAY_QUEEN_SELF = 50;  // on LOW+DES, HIGH+ASC, light

diff --git a/src/caravan/core/common.cpp b/src/caravan/core/common.cpp
@@ -274,7 +274,7 @@ std::string caravan_letter(CaravanName caravan_name) {
     }
 }
 
-uint8_t numeral_rank_value(Card c) {
+uint8_t numeral_rank_value(const Card &c) {
     switch (c.rank) {
         case ACE:
             return 1;
@@ -299,4 +299,17 @@ uint8_t numeral_rank_value(Card c) {
         default:
             throw CaravanFatalException("Card is not a numeral.");
     }
-}
+}
+
+uint8_t slot_value(const Slot &s) {
+    uint8_t value = numeral_rank_value(s.card);
+    uint8_t value_final = value;
+
+    for (int f = 0; f < s.i_faces; ++f) {
+        if (s.faces[f].rank == KING) {
+            value_final <<= 1;
+        }
+    }
+
+    return value_final;
+}
diff --git a/src/caravan/core/training.cpp b/src/caravan/core/training.cpp
@@ -316,6 +316,74 @@ bool generate_play_numeral(std::string *input, Game *game, Player *player, uint8
     return true;
 }
 
+bool generate_play_jack_self(std::string *input, Game *game, Player *player) {
+    // On lowest bust, whichever card lowers the value closest to sell max
+    Hand hand = player->get_hand();
+    uint8_t hand_size = player->get_size_hand();
+
+    uint8_t pos_jack = 0;
+
+    for (uint8_t i = 0; i < hand_size; i++) {
+        Card hcard = hand[i];
+
+        if (hcard.rank == JACK) {
+            pos_jack = i + 1;
+        }
+    }
+
+    // Hand does not have any JACK cards
+    if (pos_jack == 0) return false;
+
+    PlayerCaravanNames cvn_names = game->get_player_caravan_names(
+        player->get_name());
+
+    uint8_t pos_bust = 0;
+    uint8_t pos_slot = 0;
+    uint16_t value_bust = 0;
+    uint8_t value_after = 0;
+
+    for (uint8_t cvn_num = 1; cvn_num <= cvn_names.size(); cvn_num++) {
+        Caravan *cvn = game->get_table()->get_caravan(cvn_names[cvn_num-1]);
+        uint16_t cvn_bid = cvn->get_bid();
+
+        // Only use JACK on a bust caravan
+        if (cvn_bid <= CARAVAN_SOLD_MAX) continue;
+
+        // Get least bust caravan
+        if (pos_bust == 0 or cvn_bid < value_bust) {
+            // Determine optimal card to remove from bust caravan
+            for (uint8_t pos = 1; pos <= cvn->get_size(); pos++) {
+                Slot slot = cvn->get_slot(pos);
+
+                // Slot is full, so cannot play a JACK
+                if (slot.i_faces == TRACK_FACE_MAX) continue;
+
+                // Caravan bid if card at slot were removed
+                uint16_t bid_after = cvn->get_bid() - slot_value(slot);
+
+                // Remove card that leaves the largest bid
+                if (bid_after > value_after) {
+                    pos_bust = cvn_num - 1;
+                    pos_slot = pos;
+                    value_bust = cvn_bid;
+                    value_after = bid_after;
+                }
+            }
+        }
+    }
+
+    // No bust caravans with free slots, do not play JACK on self
+    if (pos_bust == 0) return false;
+
+    *input =
+        "P" +
+        std::to_string(pos_jack) +
+        caravan_letter(cvn_names[pos_bust - 1]) +
+        std::to_string(pos_slot);
+
+    return true;
+}
+
 bool generate_input(std::string *input, Action action, Game *game) {
     Player *player = game->get_player_turn();
 
@@ -350,7 +418,10 @@ bool generate_input(std::string *input, Action action, Game *game) {
             default:
                 return false;
         }
-    }  // TODO face cards
+
+    } else if (action == ACTION_PLAY_JACK_SELF) {
+        return generate_play_jack_self(input, game, player);
+    }
 
     return false;
 }
@@ -405,7 +476,7 @@ bool train_on_game(Game *game, QTable &q_table, ActionSpace &action_space,
 
         // Find a valid action
         while (true) {
-            if (!learning or explore or (!q_table.contains(gs))) {
+            if (!learning or explore or !q_table.contains(gs)) {
                 // If exploring, fetch a random action from the action pool
                 std::uniform_int_distribution<uint16_t> dist_pool(
                     0, action_pool.size() - 1);
@@ -418,25 +489,24 @@ bool train_on_game(Game *game, QTable &q_table, ActionSpace &action_space,
                 action_value = 0;
 
                 // Try all known actions first to see if any are above 0
-                for (auto it_q = q_table[gs].begin(); it_q != q_table[gs].end();
-                     it_q++) {
+                for (auto it_q = q_table[gs].begin(); it_q != q_table[gs].end(); it_q++) {
                     Action a = it_q->first;
 
-                    // Found an action explored in the past with a better-than-default value
                     if (q_table[gs][a] > action_value) {
                         // Find its index in action pool
-                        auto it_ap = std::find(action_pool.begin(),
-                                               action_pool.end(), a);
+                        auto it_ap = std::find(
+                            action_pool.begin(), action_pool.end(), a);
 
                         // Ignore if already removed from pool
                         if (it_ap == action_pool.end()) continue;
 
                         action_index = std::distance(action_pool.begin(), it_ap);
+
+                        // Found an action explored in the past with a better-than-default value
                         action_value = q_table[gs][action_pool[action_index]];
                     }
                 }
 
-                // If not, try the rest via explore
                 if (action_index == -1) {
                     explore = true;
                     continue;
@@ -466,21 +536,21 @@ bool train_on_game(Game *game, QTable &q_table, ActionSpace &action_space,
         // Clear pool for next time around
         action_pool.clear();
 
-
+        /*
         //if (action_value > 0)
             printf("[%s] %s (i=%hu, v=%.2f)\n",
                    pturn == PLAYER_ABC ? "ABC" : "DEF",
                    action_input.c_str(),
                    action_index,
                    action_value);
-
+        */
 
         // Perform action
         // (Exceptions intentionally not handled)
         game->play_option(&command);
 
         // Update q-table
-        if (learning and num_moves >= 2) {
+        if (num_moves >= 2) {
             PlayerName winner_name = game->get_winner_name();
 
             if (winner_name != NO_PLAYER) {
@@ -492,10 +562,11 @@ bool train_on_game(Game *game, QTable &q_table, ActionSpace &action_space,
                 }
             }
 
-            q_table[last_gs][last_action] =
-                q_table[last_gs][last_action] + tc.learning * (
-                    tc.discount * q_table[gs][action] -
-                    q_table[last_gs][last_action]);
+            if (learning)
+                q_table[last_gs][last_action] =
+                    q_table[last_gs][last_action] + tc.learning * (
+                        tc.discount * q_table[gs][action] -
+                        q_table[last_gs][last_action]);
         }
 
         if (learning) {

diff --git a/src/caravan/train.cpp b/src/caravan/train.cpp
@@ -27,20 +27,21 @@ int main(int argc, char *argv[]) {
     std::uniform_int_distribution<uint8_t> dist_first_player(
         NUM_PLAYER_ABC, NUM_PLAYER_DEF);
 
-    uint16_t checkpoint = 1;
+    uint16_t checkpoint = 1000;
     uint16_t num_wins = 0;
 
+    // Training parameters TODO user-defined arguments
+    float discount = 0.95;
+    float learning = 0.7;
+    uint32_t episode_max = 1000000;
+
     try {
         // Fill action space with all possible actions
         populate_action_space(&action_space);
 
-        // Training parameters TODO user-defined arguments
-        float discount = 0.95;
-        float learning = 0.7;
-        uint32_t episode_max = 2;
-
         // Game config uses largest deck with most samples and balance to
         // maximise chance of encountering every player hand combination.
+        // TODO random card and sample sizes
         gc = {
             .player_abc_cards = DECK_CARAVAN_MAX,
             .player_abc_samples = SAMPLE_DECKS_MAX,