From 8af9ff6dc24f526045d28fdd447b0d8de87f93ad Mon Sep 17 00:00:00 2001 From: desktable Date: Thu, 8 Oct 2020 10:57:58 -0700 Subject: [PATCH] [RLlib] Add MultiAgentEnv wrapper for Kaggle's football environment (#11249) * [RLlib] Add MultiAgentEnv wrapper for Kaggle's football environment * Add unit tests to BUILD * Add gfootball dependency * Revert the last two commits --- rllib/env/wrappers/kaggle_wrapper.py | 155 ++++++++++++++++++ .../env/wrappers/tests/test_kaggle_wrapper.py | 62 +++++++ 2 files changed, 217 insertions(+) create mode 100644 rllib/env/wrappers/kaggle_wrapper.py create mode 100644 rllib/env/wrappers/tests/test_kaggle_wrapper.py diff --git a/rllib/env/wrappers/kaggle_wrapper.py b/rllib/env/wrappers/kaggle_wrapper.py new file mode 100644 index 000000000..4586aa16a --- /dev/null +++ b/rllib/env/wrappers/kaggle_wrapper.py @@ -0,0 +1,155 @@ +"""Wrap Kaggle's environment + +Source: https://github.com/Kaggle/kaggle-environments +""" + +from copy import deepcopy +from typing import Any, Dict, Optional, Tuple + +import kaggle_environments +import numpy as np +from gym.spaces import Box +from gym.spaces import Dict as DictSpace +from gym.spaces import Discrete, MultiBinary, MultiDiscrete, Space +from gym.spaces import Tuple as TupleSpace + +from ray.rllib.env import MultiAgentEnv +from ray.rllib.utils.typing import MultiAgentDict, AgentID + + +class KaggleFootballMultiAgentEnv(MultiAgentEnv): + """An interface to the kaggle's football environment. + + See: https://github.com/Kaggle/kaggle-environments + """ + + def __init__(self, configuration: Optional[Dict[str, Any]] = None) -> None: + """Initializes a Kaggle football environment. + + Args: + configuration (Optional[Dict[str, Any]]): configuration of the + football environment. For detailed information, see: + https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/football/football.json + """ + self.kaggle_env = kaggle_environments.make( + "football", configuration=configuration or {}) + self.last_cumulative_reward = None + + def reset(self) -> MultiAgentDict: + kaggle_state = self.kaggle_env.reset() + self.last_cumulative_reward = None + return { + f"agent{idx}": self._convert_obs(agent_state["observation"]) + for idx, agent_state in enumerate(kaggle_state) + if agent_state["status"] == "ACTIVE" + } + + def step( + self, action_dict: Dict[AgentID, int] + ) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]: + # Convert action_dict (used by RLlib) to a list of actions (used by + # kaggle_environments) + action_list = [None] * len(self.kaggle_env.state) + for idx, agent_state in enumerate(self.kaggle_env.state): + if agent_state["status"] == "ACTIVE": + action = action_dict[f"agent{idx}"] + action_list[idx] = [action] + self.kaggle_env.step(action_list) + + # Parse (obs, reward, done, info) from kaggle's "state" representation + obs = {} + cumulative_reward = {} + done = {"__all__": self.kaggle_env.done} + info = {} + for idx in range(len(self.kaggle_env.state)): + agent_state = self.kaggle_env.state[idx] + agent_name = f"agent{idx}" + if agent_state["status"] == "ACTIVE": + obs[agent_name] = self._convert_obs(agent_state["observation"]) + cumulative_reward[agent_name] = agent_state["reward"] + done[agent_name] = agent_state["status"] != "ACTIVE" + info[agent_name] = agent_state["info"] + # Compute the step rewards from the cumulative rewards + if self.last_cumulative_reward is not None: + reward = { + agent_id: agent_reward - self.last_cumulative_reward[agent_id] + for agent_id, agent_reward in cumulative_reward.items() + } + else: + reward = cumulative_reward + self.last_cumulative_reward = cumulative_reward + return obs, reward, done, info + + def _convert_obs(self, obs: Dict[str, Any]) -> Dict[str, Any]: + """Convert raw observations + + These conversions are necessary to make the observations fall into the + observation space defined below. + """ + new_obs = deepcopy(obs) + if new_obs["players_raw"][0]["ball_owned_team"] == -1: + new_obs["players_raw"][0]["ball_owned_team"] = 2 + if new_obs["players_raw"][0]["ball_owned_player"] == -1: + new_obs["players_raw"][0]["ball_owned_player"] = 11 + new_obs["players_raw"][0]["steps_left"] = [ + new_obs["players_raw"][0]["steps_left"] + ] + return new_obs + + def build_agent_spaces(self) -> Tuple[Space, Space]: + """Construct the action and observation spaces + + Description of actions and observations: + https://github.com/google-research/football/blob/master/gfootball/doc/observation.md + """ # noqa: E501 + action_space = Discrete(19) + # The football field's corners are [+-1., +-0.42]. However, the players + # and balls may get out of the field. Thus we multiply those limits by + # a factor of 2. + xlim = 1. * 2 + ylim = 0.42 * 2 + num_players: int = 11 + xy_space = Box( + np.array([-xlim, -ylim], dtype=np.float32), + np.array([xlim, ylim], dtype=np.float32)) + xyz_space = Box( + np.array([-xlim, -ylim, 0], dtype=np.float32), + np.array([xlim, ylim, np.inf], dtype=np.float32)) + observation_space = DictSpace({ + "controlled_players": Discrete(2), + "players_raw": TupleSpace([ + DictSpace({ + # ball information + "ball": xyz_space, + "ball_direction": Box(-np.inf, np.inf, (3, )), + "ball_rotation": Box(-np.inf, np.inf, (3, )), + "ball_owned_team": Discrete(3), + "ball_owned_player": Discrete(num_players + 1), + # left team + "left_team": TupleSpace([xy_space] * num_players), + "left_team_direction": TupleSpace( + [xy_space] * num_players), + "left_team_tired_factor": Box(0., 1., (num_players, )), + "left_team_yellow_card": MultiBinary(num_players), + "left_team_active": MultiBinary(num_players), + "left_team_roles": MultiDiscrete([10] * num_players), + # right team + "right_team": TupleSpace([xy_space] * num_players), + "right_team_direction": TupleSpace( + [xy_space] * num_players), + "right_team_tired_factor": Box(0., 1., (num_players, )), + "right_team_yellow_card": MultiBinary(num_players), + "right_team_active": MultiBinary(num_players), + "right_team_roles": MultiDiscrete([10] * num_players), + # controlled player information + "active": Discrete(num_players), + "designated": Discrete(num_players), + "sticky_actions": MultiBinary(10), + # match state + "score": Box(-np.inf, np.inf, (2, )), + "steps_left": Box(0, np.inf, (1, )), + "game_mode": Discrete(7) + }) + ]) + }) + return action_space, observation_space diff --git a/rllib/env/wrappers/tests/test_kaggle_wrapper.py b/rllib/env/wrappers/tests/test_kaggle_wrapper.py new file mode 100644 index 000000000..56300cbc7 --- /dev/null +++ b/rllib/env/wrappers/tests/test_kaggle_wrapper.py @@ -0,0 +1,62 @@ +import unittest + +from kaggle_environments.utils import structify + +from ray.rllib.env.wrappers.kaggle_wrapper import \ + KaggleFootballMultiAgentEnv + + +class TestKaggleFootballMultiAgentEnv(unittest.TestCase): + def test_football_env(self): + env = KaggleFootballMultiAgentEnv() + obs = env.reset() + self.assertEqual(list(obs.keys()), ["agent0", "agent1"]) + action_dict = {"agent0": 0, "agent1": 0} + obs, reward, done, info = env.step(action_dict) + self.assertEqual(list(obs.keys()), ["agent0", "agent1"]) + self.assertEqual(reward, {"agent0": 0, "agent1": 0}) + self.assertEqual(done, { + "agent0": False, + "agent1": False, + "__all__": False, + }) + self.assertEqual(info, {"agent0": {}, "agent1": {}}) + + def test_football_env_run_30_steps(self): + env = KaggleFootballMultiAgentEnv() + + # use the built-in agents in the kaggle environment + run_right_agent = env.kaggle_env.agents["run_right"] + do_nothing_agent = env.kaggle_env.agents["do_nothing"] + + obs = env.reset() + self.assertEqual(list(obs.keys()), ["agent0", "agent1"]) + done = {"__all__": False} + num_steps_completed = 0 + while not done["__all__"] and num_steps_completed <= 30: + action0 = run_right_agent(structify(obs["agent0"]))[0] + action1 = do_nothing_agent(structify(obs["agent1"]))[0] + action_dict = {"agent0": action0, "agent1": action1} + obs, _, done, _ = env.step(action_dict) + num_steps_completed += 1 + + def test_kaggle_football_agent_spaces(self): + env = KaggleFootballMultiAgentEnv() + obs = env.reset() + action_space, obs_space = env.build_agent_spaces() + self.assertTrue(obs_space.contains(obs["agent0"])) + self.assertTrue(obs_space.contains(obs["agent1"])) + + action_dict = { + "agent0": action_space.sample(), + "agent1": action_space.sample(), + } + obs, _, _, _ = env.step(action_dict) + self.assertTrue(obs_space.contains(obs["agent0"])) + self.assertTrue(obs_space.contains(obs["agent1"])) + + +if __name__ == "__main__": + import sys + import pytest + sys.exit(pytest.main(["-v", __file__]))