From 8af9ff6dc24f526045d28fdd447b0d8de87f93ad Mon Sep 17 00:00:00 2001
From: desktable <desktable@users.noreply.github.com>
Date: Thu, 8 Oct 2020 10:57:58 -0700
Subject: [PATCH] [RLlib] Add MultiAgentEnv wrapper for Kaggle's football
 environment (#11249)

* [RLlib] Add MultiAgentEnv wrapper for Kaggle's football environment

* Add unit tests to BUILD

* Add gfootball dependency

* Revert the last two commits
---
 rllib/env/wrappers/kaggle_wrapper.py          | 155 ++++++++++++++++++
 .../env/wrappers/tests/test_kaggle_wrapper.py |  62 +++++++
 2 files changed, 217 insertions(+)
 create mode 100644 rllib/env/wrappers/kaggle_wrapper.py
 create mode 100644 rllib/env/wrappers/tests/test_kaggle_wrapper.py

diff --git a/rllib/env/wrappers/kaggle_wrapper.py b/rllib/env/wrappers/kaggle_wrapper.py
new file mode 100644
index 000000000..4586aa16a
--- /dev/null
+++ b/rllib/env/wrappers/kaggle_wrapper.py
@@ -0,0 +1,155 @@
+"""Wrap Kaggle's environment
+
+Source: https://github.com/Kaggle/kaggle-environments
+"""
+
+from copy import deepcopy
+from typing import Any, Dict, Optional, Tuple
+
+import kaggle_environments
+import numpy as np
+from gym.spaces import Box
+from gym.spaces import Dict as DictSpace
+from gym.spaces import Discrete, MultiBinary, MultiDiscrete, Space
+from gym.spaces import Tuple as TupleSpace
+
+from ray.rllib.env import MultiAgentEnv
+from ray.rllib.utils.typing import MultiAgentDict, AgentID
+
+
+class KaggleFootballMultiAgentEnv(MultiAgentEnv):
+    """An interface to the kaggle's football environment.
+
+    See: https://github.com/Kaggle/kaggle-environments
+    """
+
+    def __init__(self, configuration: Optional[Dict[str, Any]] = None) -> None:
+        """Initializes a Kaggle football environment.
+
+        Args:
+            configuration (Optional[Dict[str, Any]]): configuration of the
+                football environment. For detailed information, see:
+                https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/football/football.json
+        """
+        self.kaggle_env = kaggle_environments.make(
+            "football", configuration=configuration or {})
+        self.last_cumulative_reward = None
+
+    def reset(self) -> MultiAgentDict:
+        kaggle_state = self.kaggle_env.reset()
+        self.last_cumulative_reward = None
+        return {
+            f"agent{idx}": self._convert_obs(agent_state["observation"])
+            for idx, agent_state in enumerate(kaggle_state)
+            if agent_state["status"] == "ACTIVE"
+        }
+
+    def step(
+            self, action_dict: Dict[AgentID, int]
+    ) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:
+        # Convert action_dict (used by RLlib) to a list of actions (used by
+        # kaggle_environments)
+        action_list = [None] * len(self.kaggle_env.state)
+        for idx, agent_state in enumerate(self.kaggle_env.state):
+            if agent_state["status"] == "ACTIVE":
+                action = action_dict[f"agent{idx}"]
+                action_list[idx] = [action]
+        self.kaggle_env.step(action_list)
+
+        # Parse (obs, reward, done, info) from kaggle's "state" representation
+        obs = {}
+        cumulative_reward = {}
+        done = {"__all__": self.kaggle_env.done}
+        info = {}
+        for idx in range(len(self.kaggle_env.state)):
+            agent_state = self.kaggle_env.state[idx]
+            agent_name = f"agent{idx}"
+            if agent_state["status"] == "ACTIVE":
+                obs[agent_name] = self._convert_obs(agent_state["observation"])
+            cumulative_reward[agent_name] = agent_state["reward"]
+            done[agent_name] = agent_state["status"] != "ACTIVE"
+            info[agent_name] = agent_state["info"]
+        # Compute the step rewards from the cumulative rewards
+        if self.last_cumulative_reward is not None:
+            reward = {
+                agent_id: agent_reward - self.last_cumulative_reward[agent_id]
+                for agent_id, agent_reward in cumulative_reward.items()
+            }
+        else:
+            reward = cumulative_reward
+        self.last_cumulative_reward = cumulative_reward
+        return obs, reward, done, info
+
+    def _convert_obs(self, obs: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert raw observations
+
+        These conversions are necessary to make the observations fall into the
+        observation space defined below.
+        """
+        new_obs = deepcopy(obs)
+        if new_obs["players_raw"][0]["ball_owned_team"] == -1:
+            new_obs["players_raw"][0]["ball_owned_team"] = 2
+        if new_obs["players_raw"][0]["ball_owned_player"] == -1:
+            new_obs["players_raw"][0]["ball_owned_player"] = 11
+        new_obs["players_raw"][0]["steps_left"] = [
+            new_obs["players_raw"][0]["steps_left"]
+        ]
+        return new_obs
+
+    def build_agent_spaces(self) -> Tuple[Space, Space]:
+        """Construct the action and observation spaces
+
+        Description of actions and observations:
+        https://github.com/google-research/football/blob/master/gfootball/doc/observation.md
+        """  # noqa: E501
+        action_space = Discrete(19)
+        # The football field's corners are [+-1., +-0.42]. However, the players
+        # and balls may get out of the field. Thus we multiply those limits by
+        # a factor of 2.
+        xlim = 1. * 2
+        ylim = 0.42 * 2
+        num_players: int = 11
+        xy_space = Box(
+            np.array([-xlim, -ylim], dtype=np.float32),
+            np.array([xlim, ylim], dtype=np.float32))
+        xyz_space = Box(
+            np.array([-xlim, -ylim, 0], dtype=np.float32),
+            np.array([xlim, ylim, np.inf], dtype=np.float32))
+        observation_space = DictSpace({
+            "controlled_players": Discrete(2),
+            "players_raw": TupleSpace([
+                DictSpace({
+                    # ball information
+                    "ball": xyz_space,
+                    "ball_direction": Box(-np.inf, np.inf, (3, )),
+                    "ball_rotation": Box(-np.inf, np.inf, (3, )),
+                    "ball_owned_team": Discrete(3),
+                    "ball_owned_player": Discrete(num_players + 1),
+                    # left team
+                    "left_team": TupleSpace([xy_space] * num_players),
+                    "left_team_direction": TupleSpace(
+                        [xy_space] * num_players),
+                    "left_team_tired_factor": Box(0., 1., (num_players, )),
+                    "left_team_yellow_card": MultiBinary(num_players),
+                    "left_team_active": MultiBinary(num_players),
+                    "left_team_roles": MultiDiscrete([10] * num_players),
+                    # right team
+                    "right_team": TupleSpace([xy_space] * num_players),
+                    "right_team_direction": TupleSpace(
+                        [xy_space] * num_players),
+                    "right_team_tired_factor": Box(0., 1., (num_players, )),
+                    "right_team_yellow_card": MultiBinary(num_players),
+                    "right_team_active": MultiBinary(num_players),
+                    "right_team_roles": MultiDiscrete([10] * num_players),
+                    # controlled player information
+                    "active": Discrete(num_players),
+                    "designated": Discrete(num_players),
+                    "sticky_actions": MultiBinary(10),
+                    # match state
+                    "score": Box(-np.inf, np.inf, (2, )),
+                    "steps_left": Box(0, np.inf, (1, )),
+                    "game_mode": Discrete(7)
+                })
+            ])
+        })
+        return action_space, observation_space
diff --git a/rllib/env/wrappers/tests/test_kaggle_wrapper.py b/rllib/env/wrappers/tests/test_kaggle_wrapper.py
new file mode 100644
index 000000000..56300cbc7
--- /dev/null
+++ b/rllib/env/wrappers/tests/test_kaggle_wrapper.py
@@ -0,0 +1,62 @@
+import unittest
+
+from kaggle_environments.utils import structify
+
+from ray.rllib.env.wrappers.kaggle_wrapper import \
+    KaggleFootballMultiAgentEnv
+
+
+class TestKaggleFootballMultiAgentEnv(unittest.TestCase):
+    def test_football_env(self):
+        env = KaggleFootballMultiAgentEnv()
+        obs = env.reset()
+        self.assertEqual(list(obs.keys()), ["agent0", "agent1"])
+        action_dict = {"agent0": 0, "agent1": 0}
+        obs, reward, done, info = env.step(action_dict)
+        self.assertEqual(list(obs.keys()), ["agent0", "agent1"])
+        self.assertEqual(reward, {"agent0": 0, "agent1": 0})
+        self.assertEqual(done, {
+            "agent0": False,
+            "agent1": False,
+            "__all__": False,
+        })
+        self.assertEqual(info, {"agent0": {}, "agent1": {}})
+
+    def test_football_env_run_30_steps(self):
+        env = KaggleFootballMultiAgentEnv()
+
+        # use the built-in agents in the kaggle environment
+        run_right_agent = env.kaggle_env.agents["run_right"]
+        do_nothing_agent = env.kaggle_env.agents["do_nothing"]
+
+        obs = env.reset()
+        self.assertEqual(list(obs.keys()), ["agent0", "agent1"])
+        done = {"__all__": False}
+        num_steps_completed = 0
+        while not done["__all__"] and num_steps_completed <= 30:
+            action0 = run_right_agent(structify(obs["agent0"]))[0]
+            action1 = do_nothing_agent(structify(obs["agent1"]))[0]
+            action_dict = {"agent0": action0, "agent1": action1}
+            obs, _, done, _ = env.step(action_dict)
+            num_steps_completed += 1
+
+    def test_kaggle_football_agent_spaces(self):
+        env = KaggleFootballMultiAgentEnv()
+        obs = env.reset()
+        action_space, obs_space = env.build_agent_spaces()
+        self.assertTrue(obs_space.contains(obs["agent0"]))
+        self.assertTrue(obs_space.contains(obs["agent1"]))
+
+        action_dict = {
+            "agent0": action_space.sample(),
+            "agent1": action_space.sample(),
+        }
+        obs, _, _, _ = env.step(action_dict)
+        self.assertTrue(obs_space.contains(obs["agent0"]))
+        self.assertTrue(obs_space.contains(obs["agent1"]))
+
+
+if __name__ == "__main__":
+    import sys
+    import pytest
+    sys.exit(pytest.main(["-v", __file__]))