Skip to content

PvP Tournament

Player-versus-player tournament where multiple agents compete head-to-head.

Overview

PvP tournaments pit multiple agents against each other in competitive matches. Agents earn points by winning rounds, and the agent with the most cumulative points wins the tournament.

Tournament Format

  1. Initialization: All agents start with clean codebases
  2. Round Loop: For each round:
  3. All agents update their code based on previous results
  4. Game is executed with all agents
  5. Points awarded based on performance
  6. Winner: Agent with highest cumulative score

Configuration

game:
  name: BattleCode
  rounds: 15
  sims_per_round: 3
  timeout: 600

players:
  - name: Agent1
    model: gpt-4-turbo
    temperature: 0.7

  - name: Agent2
    model: claude-3-opus
    temperature: 0.7

  - name: Agent3
    model: gpt-4
    temperature: 0.5

tournament:
  keep_containers: false
  push_to_remote: false

Implementation

codeclash.tournaments.pvp.PvpTournament

PvpTournament(config: dict, *, output_dir: Path, cleanup: bool = False, push: bool = False, keep_containers: bool = False)

Bases: AbstractTournament

Source code in codeclash/tournaments/pvp.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    config: dict,
    *,
    output_dir: Path,
    cleanup: bool = False,
    push: bool = False,
    keep_containers: bool = False,
):
    super().__init__(config, name="PvpTournament", output_dir=output_dir)
    if self.metadata_file.exists():
        self.logger.critical(f"Metadata file already exists: {self.metadata_file}")
        raise FileExistsError(f"Metadata file already exists: {self.metadata_file}")
    self.cleanup_on_end = cleanup
    self.game: CodeArena = get_game(
        self.config,
        tournament_id=self.tournament_id,
        local_output_dir=self.local_output_dir,
        keep_containers=keep_containers,
    )
    self.agents: list[Player] = []
    for agent_conf in self.config["players"]:
        self.agents.append(self.get_agent(agent_conf, self.config["prompts"], push=push))

cleanup_on_end instance-attribute

cleanup_on_end = cleanup

game instance-attribute

game: CodeArena = get_game(config, tournament_id=tournament_id, local_output_dir=local_output_dir, keep_containers=keep_containers)

agents instance-attribute

agents: list[Player] = []

metadata_file property

metadata_file: Path

rounds property

rounds: int

transparent property

transparent: bool

get_metadata

get_metadata() -> dict
Source code in codeclash/tournaments/pvp.py
60
61
62
63
64
65
66
def get_metadata(self) -> dict:
    # will be saved in end()
    return {
        **super().get_metadata(),
        "game": self.game.get_metadata(),
        "agents": [agent.get_metadata() for agent in self.agents],
    }

get_agent

get_agent(agent_config: dict, prompts: dict, push: bool) -> Player

Create an agent with environment and game context.

Source code in codeclash/tournaments/pvp.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def get_agent(self, agent_config: dict, prompts: dict, push: bool) -> Player:
    """Create an agent with environment and game context."""
    environment = self.game.get_environment(f"{self.game.game_id}.{agent_config['name']}")

    game_context = GameContext(
        id=self.game.game_id,
        log_env=self.game.log_env,
        log_local=self.game.log_local,
        name=self.game.name,
        player_id=agent_config["name"],
        prompts=prompts,
        round=1,
        rounds=self.rounds,
        working_dir=str(DIR_WORK),
    )

    return get_agent(agent_config, game_context, environment, push=push)

run

run() -> None

Main execution function that runs all rounds.

Source code in codeclash/tournaments/pvp.py
86
87
88
89
90
91
92
93
94
95
96
97
def run(self) -> None:
    """Main execution function that runs all rounds."""
    try:
        self.run_competition_phase(0)  # Initial round with identical codebases
        for round_num in range(1, self.rounds + 1):
            self.run_edit_phase(round_num)
            self.run_competition_phase(round_num)
        # Need to separately compress the last round, because
        # in run_edit_phase we always only compress the previous round
        self._compress_round_folder(self.rounds)
    finally:
        self.end()

run_competition_phase

run_competition_phase(round_num: int) -> None
Source code in codeclash/tournaments/pvp.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def run_competition_phase(self, round_num: int) -> None:
    # Run the game round and get results
    stats = self.game.run_round(self.agents, round_num)
    self.logger.info(stats)

    self._metadata.setdefault("round_stats", {})[round_num] = stats.to_dict()

    # Create directory for round logs
    (self.game.log_local / "rounds" / str(round_num)).mkdir(parents=True, exist_ok=True)

    # Write logs to file
    results_file = self.game.log_local / "rounds" / str(round_num) / FILE_RESULTS
    results_file.write_text(json.dumps(stats.to_dict(), indent=2))

    self._save()

run_edit_phase

run_edit_phase(round_num: int) -> None

Execute a single training round.

Source code in codeclash/tournaments/pvp.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def run_edit_phase(self, round_num: int) -> None:
    """Execute a single training round."""
    # Copy log to agent environments
    for agent in self.agents:
        self.logger.info(f"Copying round {round_num - 1} log(s) to {agent.name}'s container...")
        copy_to_container(
            agent.environment,
            self.game.log_local / "rounds" / str(round_num - 1),
            DIR_LOGS / "rounds" / str(round_num - 1),
        )
    self._compress_round_folder(round_num - 1)

    if self.transparent:
        # Copy agent's codebase to all other agents
        self.logger.info("Transparent mode enabled: copying codebases between agents...")
        for idx in range(len(self.agents)):
            agent = self.agents[idx]
            opponents = [a for j, a in enumerate(self.agents) if j != idx]
            self.logger.info(f"Copying {agent.name}'s codebase to other agents...")
            for opp in opponents:
                copy_between_containers(
                    agent.environment,
                    opp.environment,
                    agent.environment.config.cwd,
                    f"/{OPPONENT_CODEBASES_DIR_NAME}/{agent.name}/",
                )

    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(self.run_agent, agent, round_num) for agent in self.agents]
        for future in futures:
            try:
                future.result()
            except Exception as e:
                self.logger.critical(f"Agent execution failed: {e}", exc_info=True)
                raise

    self._save()
    self.logger.info("Round completed.")

run_agent

run_agent(agent: Player, round_num: int) -> None

Run a single agent for the current round.

Source code in codeclash/tournaments/pvp.py
154
155
156
157
158
def run_agent(self, agent: Player, round_num: int) -> None:
    """Run a single agent for the current round."""
    agent.pre_run_hook(new_round=round_num)
    agent.run()
    agent.post_run_hook(round=round_num)

end

end() -> None

Save output files, clean up game resources and push agents if requested.

Source code in codeclash/tournaments/pvp.py
212
213
214
215
def end(self) -> None:
    """Save output files, clean up game resources and push agents if requested."""
    self._save()
    self.game.end(self.cleanup_on_end)