mirror of
https://github.com/wassname/ray.git
synced 2026-07-02 05:33:18 +08:00
[RLlib] Add MultiAgentEnv wrapper for Kaggle's football environment (#11249)
* [RLlib] Add MultiAgentEnv wrapper for Kaggle's football environment * Add unit tests to BUILD * Add gfootball dependency * Revert the last two commits
This commit is contained in:
Vendored
+155
@@ -0,0 +1,155 @@
|
||||
"""Wrap Kaggle's environment
|
||||
|
||||
Source: https://github.com/Kaggle/kaggle-environments
|
||||
"""
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
import kaggle_environments
|
||||
import numpy as np
|
||||
from gym.spaces import Box
|
||||
from gym.spaces import Dict as DictSpace
|
||||
from gym.spaces import Discrete, MultiBinary, MultiDiscrete, Space
|
||||
from gym.spaces import Tuple as TupleSpace
|
||||
|
||||
from ray.rllib.env import MultiAgentEnv
|
||||
from ray.rllib.utils.typing import MultiAgentDict, AgentID
|
||||
|
||||
|
||||
class KaggleFootballMultiAgentEnv(MultiAgentEnv):
|
||||
"""An interface to the kaggle's football environment.
|
||||
|
||||
See: https://github.com/Kaggle/kaggle-environments
|
||||
"""
|
||||
|
||||
def __init__(self, configuration: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Initializes a Kaggle football environment.
|
||||
|
||||
Args:
|
||||
configuration (Optional[Dict[str, Any]]): configuration of the
|
||||
football environment. For detailed information, see:
|
||||
https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/football/football.json
|
||||
"""
|
||||
self.kaggle_env = kaggle_environments.make(
|
||||
"football", configuration=configuration or {})
|
||||
self.last_cumulative_reward = None
|
||||
|
||||
def reset(self) -> MultiAgentDict:
|
||||
kaggle_state = self.kaggle_env.reset()
|
||||
self.last_cumulative_reward = None
|
||||
return {
|
||||
f"agent{idx}": self._convert_obs(agent_state["observation"])
|
||||
for idx, agent_state in enumerate(kaggle_state)
|
||||
if agent_state["status"] == "ACTIVE"
|
||||
}
|
||||
|
||||
def step(
|
||||
self, action_dict: Dict[AgentID, int]
|
||||
) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:
|
||||
# Convert action_dict (used by RLlib) to a list of actions (used by
|
||||
# kaggle_environments)
|
||||
action_list = [None] * len(self.kaggle_env.state)
|
||||
for idx, agent_state in enumerate(self.kaggle_env.state):
|
||||
if agent_state["status"] == "ACTIVE":
|
||||
action = action_dict[f"agent{idx}"]
|
||||
action_list[idx] = [action]
|
||||
self.kaggle_env.step(action_list)
|
||||
|
||||
# Parse (obs, reward, done, info) from kaggle's "state" representation
|
||||
obs = {}
|
||||
cumulative_reward = {}
|
||||
done = {"__all__": self.kaggle_env.done}
|
||||
info = {}
|
||||
for idx in range(len(self.kaggle_env.state)):
|
||||
agent_state = self.kaggle_env.state[idx]
|
||||
agent_name = f"agent{idx}"
|
||||
if agent_state["status"] == "ACTIVE":
|
||||
obs[agent_name] = self._convert_obs(agent_state["observation"])
|
||||
cumulative_reward[agent_name] = agent_state["reward"]
|
||||
done[agent_name] = agent_state["status"] != "ACTIVE"
|
||||
info[agent_name] = agent_state["info"]
|
||||
# Compute the step rewards from the cumulative rewards
|
||||
if self.last_cumulative_reward is not None:
|
||||
reward = {
|
||||
agent_id: agent_reward - self.last_cumulative_reward[agent_id]
|
||||
for agent_id, agent_reward in cumulative_reward.items()
|
||||
}
|
||||
else:
|
||||
reward = cumulative_reward
|
||||
self.last_cumulative_reward = cumulative_reward
|
||||
return obs, reward, done, info
|
||||
|
||||
def _convert_obs(self, obs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert raw observations
|
||||
|
||||
These conversions are necessary to make the observations fall into the
|
||||
observation space defined below.
|
||||
"""
|
||||
new_obs = deepcopy(obs)
|
||||
if new_obs["players_raw"][0]["ball_owned_team"] == -1:
|
||||
new_obs["players_raw"][0]["ball_owned_team"] = 2
|
||||
if new_obs["players_raw"][0]["ball_owned_player"] == -1:
|
||||
new_obs["players_raw"][0]["ball_owned_player"] = 11
|
||||
new_obs["players_raw"][0]["steps_left"] = [
|
||||
new_obs["players_raw"][0]["steps_left"]
|
||||
]
|
||||
return new_obs
|
||||
|
||||
def build_agent_spaces(self) -> Tuple[Space, Space]:
|
||||
"""Construct the action and observation spaces
|
||||
|
||||
Description of actions and observations:
|
||||
https://github.com/google-research/football/blob/master/gfootball/doc/observation.md
|
||||
""" # noqa: E501
|
||||
action_space = Discrete(19)
|
||||
# The football field's corners are [+-1., +-0.42]. However, the players
|
||||
# and balls may get out of the field. Thus we multiply those limits by
|
||||
# a factor of 2.
|
||||
xlim = 1. * 2
|
||||
ylim = 0.42 * 2
|
||||
num_players: int = 11
|
||||
xy_space = Box(
|
||||
np.array([-xlim, -ylim], dtype=np.float32),
|
||||
np.array([xlim, ylim], dtype=np.float32))
|
||||
xyz_space = Box(
|
||||
np.array([-xlim, -ylim, 0], dtype=np.float32),
|
||||
np.array([xlim, ylim, np.inf], dtype=np.float32))
|
||||
observation_space = DictSpace({
|
||||
"controlled_players": Discrete(2),
|
||||
"players_raw": TupleSpace([
|
||||
DictSpace({
|
||||
# ball information
|
||||
"ball": xyz_space,
|
||||
"ball_direction": Box(-np.inf, np.inf, (3, )),
|
||||
"ball_rotation": Box(-np.inf, np.inf, (3, )),
|
||||
"ball_owned_team": Discrete(3),
|
||||
"ball_owned_player": Discrete(num_players + 1),
|
||||
# left team
|
||||
"left_team": TupleSpace([xy_space] * num_players),
|
||||
"left_team_direction": TupleSpace(
|
||||
[xy_space] * num_players),
|
||||
"left_team_tired_factor": Box(0., 1., (num_players, )),
|
||||
"left_team_yellow_card": MultiBinary(num_players),
|
||||
"left_team_active": MultiBinary(num_players),
|
||||
"left_team_roles": MultiDiscrete([10] * num_players),
|
||||
# right team
|
||||
"right_team": TupleSpace([xy_space] * num_players),
|
||||
"right_team_direction": TupleSpace(
|
||||
[xy_space] * num_players),
|
||||
"right_team_tired_factor": Box(0., 1., (num_players, )),
|
||||
"right_team_yellow_card": MultiBinary(num_players),
|
||||
"right_team_active": MultiBinary(num_players),
|
||||
"right_team_roles": MultiDiscrete([10] * num_players),
|
||||
# controlled player information
|
||||
"active": Discrete(num_players),
|
||||
"designated": Discrete(num_players),
|
||||
"sticky_actions": MultiBinary(10),
|
||||
# match state
|
||||
"score": Box(-np.inf, np.inf, (2, )),
|
||||
"steps_left": Box(0, np.inf, (1, )),
|
||||
"game_mode": Discrete(7)
|
||||
})
|
||||
])
|
||||
})
|
||||
return action_space, observation_space
|
||||
+62
@@ -0,0 +1,62 @@
|
||||
import unittest
|
||||
|
||||
from kaggle_environments.utils import structify
|
||||
|
||||
from ray.rllib.env.wrappers.kaggle_wrapper import \
|
||||
KaggleFootballMultiAgentEnv
|
||||
|
||||
|
||||
class TestKaggleFootballMultiAgentEnv(unittest.TestCase):
|
||||
def test_football_env(self):
|
||||
env = KaggleFootballMultiAgentEnv()
|
||||
obs = env.reset()
|
||||
self.assertEqual(list(obs.keys()), ["agent0", "agent1"])
|
||||
action_dict = {"agent0": 0, "agent1": 0}
|
||||
obs, reward, done, info = env.step(action_dict)
|
||||
self.assertEqual(list(obs.keys()), ["agent0", "agent1"])
|
||||
self.assertEqual(reward, {"agent0": 0, "agent1": 0})
|
||||
self.assertEqual(done, {
|
||||
"agent0": False,
|
||||
"agent1": False,
|
||||
"__all__": False,
|
||||
})
|
||||
self.assertEqual(info, {"agent0": {}, "agent1": {}})
|
||||
|
||||
def test_football_env_run_30_steps(self):
|
||||
env = KaggleFootballMultiAgentEnv()
|
||||
|
||||
# use the built-in agents in the kaggle environment
|
||||
run_right_agent = env.kaggle_env.agents["run_right"]
|
||||
do_nothing_agent = env.kaggle_env.agents["do_nothing"]
|
||||
|
||||
obs = env.reset()
|
||||
self.assertEqual(list(obs.keys()), ["agent0", "agent1"])
|
||||
done = {"__all__": False}
|
||||
num_steps_completed = 0
|
||||
while not done["__all__"] and num_steps_completed <= 30:
|
||||
action0 = run_right_agent(structify(obs["agent0"]))[0]
|
||||
action1 = do_nothing_agent(structify(obs["agent1"]))[0]
|
||||
action_dict = {"agent0": action0, "agent1": action1}
|
||||
obs, _, done, _ = env.step(action_dict)
|
||||
num_steps_completed += 1
|
||||
|
||||
def test_kaggle_football_agent_spaces(self):
|
||||
env = KaggleFootballMultiAgentEnv()
|
||||
obs = env.reset()
|
||||
action_space, obs_space = env.build_agent_spaces()
|
||||
self.assertTrue(obs_space.contains(obs["agent0"]))
|
||||
self.assertTrue(obs_space.contains(obs["agent1"]))
|
||||
|
||||
action_dict = {
|
||||
"agent0": action_space.sample(),
|
||||
"agent1": action_space.sample(),
|
||||
}
|
||||
obs, _, _, _ = env.step(action_dict)
|
||||
self.assertTrue(obs_space.contains(obs["agent0"]))
|
||||
self.assertTrue(obs_space.contains(obs["agent1"]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import pytest
|
||||
sys.exit(pytest.main(["-v", __file__]))
|
||||
Reference in New Issue
Block a user