SinglePlayer Tournament

Overview

SinglePlayer mode has an agent play against itself.

Tournament Format

Initialization: Agent starts with clean codebase
Round Loop: For each round:
Agent updates code
Challenge/benchmark is executed
Performance metrics recorded
Evaluation: Agent scored on cumulative performance

Configuration

game:
  name: HuskyBench
  rounds: 10


players:
  - name: SoloAgent
    model: gpt-4-turbo
    temperature: 0.7

Running a SinglePlayer Tournament

python main_single_player.py configs/examples/battlesnake_single_player.yaml

Implementation

codeclash.tournaments.single_player.SinglePlayerTraining

SinglePlayerTraining(config: dict, *, output_dir: Path, cleanup: bool = False, keep_containers: bool = False)

Bases: AbstractTournament

Source code in codeclash/tournaments/single_player.py

def __init__(self, config: dict, *, output_dir: Path, cleanup: bool = False, keep_containers: bool = False):
    super().__init__(config, name="SinglePlayerTraining", output_dir=output_dir)
    self.cleanup_on_end = cleanup
    self.game: CodeArena = get_arena(
        self.config,
        tournament_id=self.tournament_id,
        local_output_dir=self.local_output_dir,
        keep_containers=keep_containers,
    )
    self.agent: Player = self.get_agent(self.config["player"], round=1)
    mirror_agent_config = copy.deepcopy(self.config["player"])
    mirror_agent_config["name"] = "mirror"
    self.mirror_agent: Player = self.get_agent(mirror_agent_config, round=0)

cleanup_on_end `instance-attribute`

cleanup_on_end = cleanup

game `instance-attribute`

game: CodeArena = get_arena(config, tournament_id=tournament_id, local_output_dir=local_output_dir, keep_containers=keep_containers)

agent `instance-attribute`

agent: Player = get_agent(config['player'], round=1)

mirror_agent `instance-attribute`

mirror_agent: Player = get_agent(mirror_agent_config, round=0)

rounds `property`

rounds: int

get_metadata

get_metadata() -> dict

Source code in codeclash/tournaments/single_player.py

def get_metadata(self) -> dict:
    return {
        **super().get_metadata(),
        "game": self.game.get_metadata(),
        "agents": [self.agent.get_metadata(), self.mirror_agent.get_metadata()],
    }

get_game_context

get_game_context(agent_config: dict, *, round: int) -> GameContext

Create a game context for an agent.

Source code in codeclash/tournaments/single_player.py

def get_game_context(self, agent_config: dict, *, round: int) -> GameContext:
    """Create a game context for an agent."""
    return GameContext(
        id=self.game.game_id,
        log_env=self.game.log_env,
        log_local=self.game.log_local,
        name=self.game.name,
        player_id=agent_config["name"],
        prompts=self.config["prompts"],
        round=round,
        rounds=self.rounds,
        working_dir=str(DIR_WORK),
    )

get_agent

get_agent(agent_config: dict, round: int) -> Player

Create an agent with environment and game context.

Source code in codeclash/tournaments/single_player.py

def get_agent(self, agent_config: dict, round: int) -> Player:
    """Create an agent with environment and game context."""
    environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}")
    game_context = self.get_game_context(agent_config, round=round)
    return get_agent(agent_config, game_context, environment)

get_dummy_agent

get_dummy_agent(player_config: dict) -> Player

Create a dummy agent that does nothing.

Source code in codeclash/tournaments/single_player.py

def get_dummy_agent(self, player_config: dict) -> Player:
    """Create a dummy agent that does nothing."""
    return Dummy(
        player_config,
        environment=self.game.get_environment(f"{self.game.game_id}.dummy"),
        game_context=self.get_game_context(player_config, round=0),
    )

run

run()

Main execution function that runs all rounds.

Source code in codeclash/tournaments/single_player.py

def run(self):
    """Main execution function that runs all rounds."""
    try:
        for round_num in range(1, self.rounds + 1):
            self.run_training_round(round_num)
        if self.config["tournament"]["evaluate_matrix"]:
            self.evaluate()
    finally:
        self.end()

run_training_round

run_training_round(round_num: int) -> None

Execute a single training round, i.e., run the game, then run the agent.

Source code in codeclash/tournaments/single_player.py

def run_training_round(self, round_num: int) -> None:
    """Execute a single training round, i.e., run the game, then run the agent."""
    # Run the game round and get results
    stats = self.game.run_round([self.agent, self.mirror_agent], round_num)
    self.logger.info(stats)
    self._metadata.setdefault("round_stats", {})[round_num] = stats.to_dict()

    # Write log to file
    results_file = self.game.log_local / "rounds" / str(round_num) / FILE_RESULTS
    results_file.write_text(json.dumps(stats.to_dict(), indent=2))

    # Copy log to main agent environment only
    self.logger.info(f"Copying round {round_num} log(s) to {self.agent.name}'s container...")
    copy_to_container(
        self.agent.environment,
        self.game.log_local / "rounds" / str(round_num),
        f"logs/rounds/{round_num}/",
    )

    self.run_main_agent(round_num)
    mirror_agent_state = round_num - 1 if round_num > 1 else 0
    self.set_mirror_state_to_round(mirror_agent_state)

    self._save()

    self.logger.info("Round completed.")

run_main_agent

run_main_agent(round_num: int)

Run the main agent for the current round.

Source code in codeclash/tournaments/single_player.py

def run_main_agent(self, round_num: int):
    """Run the main agent for the current round."""
    self.agent.pre_run_hook(new_round=round_num)
    self.agent.run()
    self.agent.post_run_hook(round=round_num)

set_mirror_state_to_round

set_mirror_state_to_round(round_num: int)

Update mirror agent's codebase with the main agent's changes.

Source code in codeclash/tournaments/single_player.py

def set_mirror_state_to_round(self, round_num: int):
    """Update mirror agent's codebase with the main agent's changes."""
    full_diff = self._get_round_diff(self.agent.name, round_num)
    full_diff = filter_git_diff(full_diff)
    self.mirror_agent.reset_and_apply_patch(full_diff)

end

end()

Clean up game resources.

Source code in codeclash/tournaments/single_player.py

def end(self):
    """Clean up game resources."""
    self._save()
    self.game.end(self.cleanup_on_end)

evaluate

evaluate(n_repetitions: int = 3) -> None

Evaluate the agent's performance by calculating the matrix of every round against each other.

Source code in codeclash/tournaments/single_player.py

def evaluate(self, n_repetitions: int = 3) -> None:
    """Evaluate the agent's performance by
    calculating the matrix of every round against each other.
    """
    p1_config = self.config["player"].copy()
    p1_config["name"] = "p1"
    p1 = self.get_dummy_agent(p1_config)

    p2_config = self.config["player"].copy()
    p2_config["name"] = "p2"
    p2 = self.get_dummy_agent(p2_config)
    matrix = {
        p1_round: {p2_round: [] for p2_round in range(0, self.rounds + 1)} for p1_round in range(0, self.rounds + 1)
    }
    for p1_round in range(0, self.rounds + 1):
        for p2_round in range(0, self.rounds + 1):
            self.logger.info(f"Evaluating agent at round {p1_round} against agent at round {p2_round}")
            p1_patch = self._get_round_diff(self.agent.name, p1_round)
            p2_patch = self._get_round_diff(self.agent.name, p2_round)
            p1.reset_and_apply_patch(p1_patch)
            p2.reset_and_apply_patch(p2_patch)
            for i_repetition in range(n_repetitions):
                stats = self.game.run_round([p1, p2], round_num=int(f"{p1_round}{p2_round}{i_repetition}"))
                self.logger.info(f"Round {p1_round} vs {p2_round} repetition {i_repetition} winner: {stats.winner}")
                matrix[p1_round][p2_round].append(stats.winner)
    self.logger.info(f"Evaluation matrix: {matrix}")
    self._metadata.setdefault("evaluation", {})["matrix"] = matrix