diff --git a/python/splendor/base.py b/python/splendor/base.py index 744e978..5aa7d6d 100644 --- a/python/splendor/base.py +++ b/python/splendor/base.py @@ -101,7 +101,7 @@ class PlayerState: def pay_for_card(self, card: Card) -> dict[GemColor, int]: """Pay tokens for card, move card to tableau, return payment for bank.""" if not self.can_afford(card): - msg = f"{self.name} cannot afford card {card}" + msg = f"cannot afford card {card}" raise ValueError(msg) payment: dict[GemColor, int] = dict.fromkeys(GEM_COLORS, 0) @@ -248,7 +248,6 @@ class BuyCard(Action): class BuyCardReserved(Action): """Buy a face-up card.""" - tier: int index: int @@ -472,9 +471,9 @@ def apply_action(game: GameState, strategy: Strategy, action: Action) -> None: action_func(game, strategy, action) -def legal_actions( +def get_legal_actions( game: GameState, - player_index: int | None = None, + player: PlayerState | None = None, ) -> list[Action]: """Enumerate all syntactically legal actions for the given player. @@ -483,9 +482,8 @@ def legal_actions( - reserve limits - affordability for buys """ - if player_index is None: - player_index = game.current_player_index - player = game.players[player_index] + if player is None: + player = game.players[game.current_player_index] actions: list[Action] = [] @@ -504,7 +502,7 @@ def legal_actions( for idx, card in enumerate(player.reserved): if player.can_afford(card): - actions.append(BuyCard(tier=0, index=idx, from_reserved=True)) + actions.append(BuyCardReserved(index=idx)) if len(player.reserved) < game.config.reserve_limit: for tier, row in game.table_by_tier.items(): diff --git a/python/splendor/bot.py b/python/splendor/bot.py index c58ce56..ac1904a 100644 --- a/python/splendor/bot.py +++ b/python/splendor/bot.py @@ -8,6 +8,7 @@ from .base import ( BASE_COLORS, Action, BuyCard, + BuyCardReserved, Card, GameState, GemColor, @@ -17,6 +18,7 @@ from .base import ( TakeDifferent, TakeDouble, auto_discard_tokens, + get_legal_actions, ) @@ -35,7 +37,7 @@ def can_bot_afford(player: PlayerState, card: Card) -> bool: class RandomBot(Strategy): """Dumb bot that follows rules but doesn't think.""" - def __init__(self, name: str = "Bot") -> None: + def __init__(self, name: str) -> None: super().__init__(name=name) def choose_action(self, game: GameState, player: PlayerState) -> Action | None: @@ -71,3 +73,193 @@ class RandomBot(Strategy): excess: int, ) -> dict[GemColor, int]: return auto_discard_tokens(player, excess) + + +class PersonalizedBot(Strategy): + """Dumb bot that follows rules but doesn't think.""" + + def __init__(self, name: str) -> None: + super().__init__(name=name) + + def check_cards_in_tier(self, row: list[Card], player: PlayerState) -> bool: + """Check if player can afford card, using discounts + gold.""" + return [index for index, card in enumerate(row) if can_bot_afford(player, card)] + + def choose_action(self, game: GameState, player: PlayerState) -> Action | None: + for tier in (1, 2, 3): + row = game.table_by_tier[tier] + if affordable := check_cards_in_tier(row, player): + index = random.choice(affordable) + return BuyCard(tier=tier, index=index) + + colors_for_diff = [c for c in BASE_COLORS if game.bank[c] > 0] + random.shuffle(colors_for_diff) + return TakeDifferent(colors=colors_for_diff[:3]) + + def choose_discard( + self, + game: GameState, + player: PlayerState, + excess: int, + ) -> dict[GemColor, int]: + return auto_discard_tokens(player, excess) + + +def check_cards_in_tier(row: list[Card], player: PlayerState) -> bool: + """Check if player can afford card, using discounts + gold.""" + return [index for index, card in enumerate(row) if can_bot_afford(player, card)] + + +class PersonalizedBot2(Strategy): + """Dumb bot that follows rules but doesn't think.""" + + def __init__(self, name: str) -> None: + super().__init__(name=name) + + def choose_action(self, game: GameState, player: PlayerState) -> Action | None: + tiers = (1, 2, 3) + for tier in tiers: + row = game.table_by_tier[tier] + if affordable := check_cards_in_tier(row, player): + index = random.choice(affordable) + return BuyCard(tier=tier, index=index) + + if affordable := check_cards_in_tier(player.reserved, player): + index = random.choice(affordable) + return BuyCardReserved(index=index) + + colors_for_diff = [c for c in BASE_COLORS if game.bank[c] > 0] + if len(colors_for_diff) >= 3: + random.shuffle(colors_for_diff) + return TakeDifferent(colors=colors_for_diff[:3]) + + for tier in tiers: + len_deck = len(game.decks_by_tier[tier]) + if len_deck: + return ReserveCard(tier=tier, index=None, from_deck=True) + + return TakeDifferent(colors=colors_for_diff[:3]) + + def choose_discard( + self, + game: GameState, + player: PlayerState, + excess: int, + ) -> dict[GemColor, int]: + return auto_discard_tokens(player, excess) + + +def buy_card_reserved(player: PlayerState) -> Action | None: + if affordable := check_cards_in_tier(player.reserved, player): + index = random.choice(affordable) + return BuyCardReserved(index=index) + return None + + +def buy_card(game: GameState, player: PlayerState) -> Action | None: + for tier in (1, 2, 3): + row = game.table_by_tier[tier] + if affordable := check_cards_in_tier(row, player): + index = random.choice(affordable) + return BuyCard(tier=tier, index=index) + return None + + +def take_toekns(game: GameState) -> Action | None: + colors_for_diff = [color for color in BASE_COLORS if game.bank[color] > 0] + if len(colors_for_diff) >= 3: + random.shuffle(colors_for_diff) + return TakeDifferent(colors=colors_for_diff[: game.config.max_token_take]) + return None + + +class PersonalizedBot3(Strategy): + """Dumb bot that follows rules but doesn't think.""" + + def __init__(self, name: str) -> None: + super().__init__(name=name) + + def choose_action(self, game: GameState, player: PlayerState) -> Action | None: + print(len(get_legal_actions(game, player))) + print(get_legal_actions(game, player)) + if action := buy_card_reserved(player): + return action + if action := buy_card(game, player): + return action + + colors_for_diff = [color for color in BASE_COLORS if game.bank[color] > 0] + if len(colors_for_diff) >= 3: + random.shuffle(colors_for_diff) + return TakeDifferent(colors=colors_for_diff[:3]) + + for tier in (1, 2, 3): + len_deck = len(game.decks_by_tier[tier]) + if len_deck: + return ReserveCard(tier=tier, index=None, from_deck=True) + + return TakeDifferent(colors=colors_for_diff[:3]) + + def choose_discard( + self, + game: GameState, + player: PlayerState, + excess: int, + ) -> dict[GemColor, int]: + return auto_discard_tokens(player, excess) + + +def estimate_value_of_card(game: GameState, player: PlayerState, color: GemColor) -> int: + """Estimate value of a color in the player's bank.""" + return game.bank[color] - player.discounts.get(color, 0) + + +def estimate_value_of_token(game: GameState, player: PlayerState, color: GemColor) -> int: + """Estimate value of a color in the player's bank.""" + return game.bank[color] - player.discounts.get(color, 0) + + +class PersonalizedBot4(Strategy): + def __init__(self, name: str) -> None: + super().__init__(name=name) + + def filter_actions(self, actions: list[Action]) -> list[Action]: + return [ + action + for action in actions + if isinstance(action, TakeDifferent) and len(action.colors) == 3 or not isinstance(action, TakeDifferent) + ] + + def choose_action(self, game: GameState, player: PlayerState) -> Action | None: + legal_actions = get_legal_actions(game, player) + print(len(legal_actions)) + + good_actions = [action for action in self.filter_actions(legal_actions)] + print(len(good_actions)) + + print(good_actions) + + print(len(get_legal_actions(game, player))) + if action := buy_card_reserved(player): + return action + if action := buy_card(game, player): + return action + + colors_for_diff = [color for color in BASE_COLORS if game.bank[color] > 0] + if len(colors_for_diff) >= 3: + random.shuffle(colors_for_diff) + return TakeDifferent(colors=colors_for_diff[:3]) + + for tier in (1, 2, 3): + len_deck = len(game.decks_by_tier[tier]) + if len_deck: + return ReserveCard(tier=tier, index=None, from_deck=True) + + return TakeDifferent(colors=colors_for_diff[:3]) + + def choose_discard( + self, + game: GameState, + player: PlayerState, + excess: int, + ) -> dict[GemColor, int]: + return auto_discard_tokens(player, excess) diff --git a/python/splendor/simulat.py b/python/splendor/simulat.py index 846b5c1..db60853 100644 --- a/python/splendor/simulat.py +++ b/python/splendor/simulat.py @@ -4,17 +4,18 @@ from __future__ import annotations from collections import defaultdict from pathlib import Path +from statistics import mean from .base import GameConfig, load_cards, load_nobles, new_game, run_game -from .bot import RandomBot +from .bot import PersonalizedBot, PersonalizedBot4, PersonalizedBot3, RandomBot def main() -> None: """Main entry point.""" turn_limit = 1000 good_games = 0 - games = 10000 - winners: dict[str, int] = defaultdict(int) + games = 1 + winners: dict[str, list] = defaultdict(list) game_data = Path(__file__).parent / "game_data" cards = load_cards(game_data / "cards/default.json") @@ -24,7 +25,7 @@ def main() -> None: bot_a = RandomBot("bot_a") bot_b = RandomBot("bot_b") bot_c = RandomBot("bot_c") - bot_d = RandomBot("bot_d") + bot_d = PersonalizedBot4("my_bot") config = GameConfig( cards=cards, nobles=nobles, @@ -35,11 +36,13 @@ def main() -> None: winner, turns = run_game(game_state) if turns < turn_limit: good_games += 1 - winners[winner.strategy.name] += 1 + winners[winner.strategy.name].append(turns) - print(f"out of {games} {turn_limit} turn games with 4 random bots there where {good_games} games where a bot won") - for k, v in winners.items(): - print(f"{k} won {v}") + print( + f"out of {games} {turn_limit} turn games with {len(players)} random bots there where {good_games} games where a bot won" + ) + for name, turns in winners.items(): + print(f"{name} won {len(turns)} games in {mean(turns):.2f} turns") if __name__ == "__main__":