Module ai.simulators
Module containing the abstract definition of a simulator, as well as several implementations of it.
Expand source code
"""Module containing the abstract definition of a simulator, as well as several
implementations of it."""
from . import action_spaces
from ._base import Base
from ._factory import Factory
from ._connect_four import ConnectFour
from ._tictactoe import TicTacToe
from ._grid import Grid
from ._cart_pole import CartPole
__all__ = [
"TicTacToe",
"Base",
"ConnectFour",
"Factory",
"action_spaces",
"Grid",
"CartPole",
]
Sub-modules
ai.simulators.action_spaces
-
Action spaces for simulators.
Classes
class Base (deterministic: bool)
-
Simulator base class.
A simulator, as opposed to an environment, executes actions based on a given state, rather than a interally tracked state.
Args
deterministic
:bool
- Flag indicating if this simulator instance is considered deterministic or not.
Ancestors
- abc.ABC
Subclasses
Static methods
def get_factory(*args, **kwargs) ‑> Factory
-
Creates and returns a factory object that spawns simulators when called.
Args and kwargs are passed along to the class constructor. However, if other behavior is required, feel free to override this method and return a factory class of your choice.
Expand source code
@classmethod def get_factory(cls, *args, **kwargs) -> "simulators.Factory": """Creates and returns a factory object that spawns simulators when called. Args and kwargs are passed along to the class constructor. However, if other behavior is required, feel free to override this method and return a factory class of your choice.""" return simulators.Factory(cls, *args, **kwargs)
Instance variables
var action_space : Base
-
The action space class used by this simulator.
Expand source code
@property @abstractmethod def action_space(self) -> simulators.action_spaces.Base: """The action space class used by this simulator.""" raise NotImplementedError
var deterministic : bool
-
Whether or not the simulator instance is deterministic or stochastic.
Expand source code
@property def deterministic(self) -> bool: """Whether or not the simulator instance is deterministic or stochastic.""" return self._deterministic
Methods
def close(self)
-
Disposes resources used by the simulator.
Expand source code
@abstractmethod def close(self): """Disposes resources used by the simulator.""" raise NotImplementedError
def render(self, state: numpy.ndarray)
-
Renders the given state.
Expand source code
@abstractmethod def render(self, state: np.ndarray): """Renders the given state.""" raise NotImplementedError
def reset(self) ‑> numpy.ndarray
-
Provides a single new environment state.
Returns
np.ndarray
- Initial state
Expand source code
def reset(self) -> np.ndarray: """Provides a single new environment state. Returns: np.ndarray: Initial state """ return self.reset_bulk(1)[0]
def reset_bulk(self, n: int) ‑> numpy.ndarray
-
Provides multiple new environment states.
Args
n
:int
- Number of states to generate.
Returns
np.ndarray
- Initial states, stacked in the first dimension.
Expand source code
@abstractmethod def reset_bulk(self, n: int) -> np.ndarray: """Provides multiple new environment states. Args: n (int): Number of states to generate. Returns: np.ndarray: Initial states, stacked in the first dimension. """ raise NotImplementedError
def step(self, state: numpy.ndarray, action: int) ‑> Tuple[numpy.ndarray, float, bool, Dict]
-
Executes one step in the environment.
Args
state
:np.ndarray
- State
action
:int
- Action index
Returns
Tuple[np.ndarray, float, bool, Dict]
- Tuple of next state, reward, terminal
flag, and debugging dictionary.
Expand source code
def step( self, state: np.ndarray, action: int ) -> Tuple[np.ndarray, float, bool, Dict]: """Executes one step in the environment. Args: state (np.ndarray): State action (int): Action index Returns: Tuple[np.ndarray, float, bool, Dict]: Tuple of next state, reward, terminal flag, and debugging dictionary. """ next_states, rewards, terminals, infos = self.step_bulk( np.expand_dims(state, 0), np.array([action]) ) return next_states[0], rewards[0], terminals[0], infos[0]
def step_bulk(self, states: numpy.ndarray, actions: numpy.ndarray) ‑> Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, List[Dict]]
-
Executes a bulk of actions in multiple states.
Args
states
:np.ndarray
- States, in batch format.
actions
:np.ndarray
- Integer vector of action indices.
Returns
Tuple[np.ndarray, np.ndarray, np.ndarray, List[Dict]]
- Tuple of
next states, rewards, terminal flags, and debugging dictionaries.
Expand source code
@abstractmethod def step_bulk( self, states: np.ndarray, actions: np.ndarray ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[Dict]]: """Executes a bulk of actions in multiple states. Args: states (np.ndarray): States, in batch format. actions (np.ndarray): Integer vector of action indices. Returns: Tuple[np.ndarray, np.ndarray, np.ndarray, List[Dict]]: Tuple of next states, rewards, terminal flags, and debugging dictionaries. """ raise NotImplementedError
class CartPole
-
Simulator implementation of the inverted pendulum (aka CartPole).
State is given by a vector in R^5 with elements indicating cart position, cart velocity, pole angular deviation from center, pole angular velocity, and steps made.
Action space is discrete and of size two. Action 0 applies a force in the positive direction (positive relative to cart position), and action 1 negative.
Rewards are given as 1 for each step. Episodes are terminated when either 200 steps have passed, or the cart position is greater than 2.4 from the start location, or the pole deviation is greater than 12 degrees.
This simulator is based on https://github.com/openai/gym/blob/a5a6ae6bc0a5cfc0ff1ce9be723d59593c165022/gym/envs/classic_control/cartpole.py # noqa
Args
deterministic
:bool
- Flag indicating if this simulator instance is considered deterministic or not.
Ancestors
- Base
- abc.ABC
Class variables
var ActionSpace
-
Discrete action space.
Discrete action spaces identify actions using the an integer and have a fixed size. Moreover, all action are not necessarily legal in every state. Legal actions are given by the action mask, a boolean vector whose elements at legal action indices are
True
and illegal action indicesFalse
.
Inherited members
class ConnectFour
-
Connect four (four in a row) game simulator.
States are given by a single
np.ndarray
of shape(43, )
. The first 42 elements denote the game board in row-major order (board is of shape(6, 7)
). Each board element is in{-1, 0, 1}
, where-1
and1
denote occupied cells and0
empty cells. The last element in the state vector (i.e. element index 42) is either+1
or-1
, denoting the player who is about to play.Actions are discrete in
{0, 1, ..., 6}
, denoting, from the left, which column to place the next marker in.Rewards are given at the end of a game round, i.e. intermediate rewards are zero. Then, if a winning action is rewarded with
+1
and a losing action is rewarded with-1
.Args
deterministic
:bool
- Flag indicating if this simulator instance is considered deterministic or not.
Ancestors
- Base
- abc.ABC
Class variables
var ActionSpace
-
Action space of the ConnectFour simulator.
Methods
def render(self, state: numpy.ndarray, output_fn: Callable[[str], None] = <built-in function print>)
-
Renders the game board to a string and then outputs it to the given output function.
Args
state
:np.ndarray
- State to render
output_fn
:Callable[[str], None]
, optional- Output function, accepting one
string argument. Defaults to
print
.Expand source code
def render(self, state: np.ndarray, output_fn: Callable[[str], None] = print): """Renders the game board to a string and then outputs it to the given output function. Args: state (np.ndarray): State to render output_fn (Callable[[str], None], optional): Output function, accepting one string argument. Defaults to `print`. """ def tile(value) -> str: if value == 0: return " " elif value == 1: return "X" elif value == -1: return "O" else: raise ValueError(f"Unexpected value {value}") def print_line(i: int): output_fn(" | ".join(tile(state[7 * i + j]) for j in range(7))) output_fn(" | ".join(str(x) for x in range(7))) for i in range(6): print_line(i)
Inherited members
class Factory (cls: Type[Base], *args, **kwargs)
-
Factories are callable objects that spawn simulator instances.
Args
cls
:Type[simulators.Base]
- Simulator class.
*args, **kwargs
: arguments and key-word arguments passed to the simulator__init__
method. class Grid (dim: int, sizes: Sequence[int])
-
Simple grid navigation environment. Agents can move in either direction in all dimensions and need to reach a goal state.
States are given by two vectors, specifying the grid coordinates of the agent and goal respectively. A grid of dimension
N
therefore has a state shape of(2, N)
. On reset, start state and goal state are sampled uniformly across the grid. Steps can be taken in one dimension only, and the action space is discrete with size2N
. Action indices2k
moves the agent in the positive direction of dimensionk
, and2k+1
in the negative direction. Rewards are given as 1 whenever the goal is reached, otherwise 0.Args
dim
:int
- Dimension of the grid world.
sizes
:Sequence[int]
- Sizes of each dimension.
Ancestors
- Base
- abc.ABC
Inherited members
class TicTacToe
-
TicTacToe (connect three, or three in a row) simulator.
States are given by a single
np.ndarray
of shape(10, )
. The first 9 elements denote the game board in row-major order (board is of shape(3, 3)
). Each board element is in{-1, 0, 1}
, where-1
and1
denote occupied cells and0
empty cells. The last element in the state vector (i.e. element index 9) is either+1
or-1
, denoting the player who is about to play.Actions discrete in
{0, 1, ..., 8}
, denoting, in row-major order, which cell to place the next marker in.Rewards are given at the end of a game round, i.e. intermediate rewards are zero. Then, if a winning action is rewarded with
+1
and a losing action is rewarded with-1
.Args
deterministic
:bool
- Flag indicating if this simulator instance is considered deterministic or not.
Ancestors
- Base
- abc.ABC
Class variables
var ActionSpace
-
Action space for the TicTacToe simulator.
Methods
def render(self, state: numpy.ndarray, output_fn: Callable[[str], None] = <built-in function print>)
-
Renders the game board and action index map to a string that is then output through the given output function.
Args
state
:np.ndarray
- State to render
output_fn
:Callable[[str], None]
, optional- Output function, called with
the generated string. Defaults to
print
.Expand source code
def render(self, state: np.ndarray, output_fn: Callable[[str], None] = print): """Renders the game board and action index map to a string that is then output through the given output function. Args: state (np.ndarray): State to render output_fn (Callable[[str], None], optional): Output function, called with the generated string. Defaults to `print`. """ def tile(value) -> str: if value == 0: return " " elif value == 1: return "X" elif value == -1: return "O" else: raise ValueError(f"Unexpected value {value}") output_fn( f""" | --- | --- | --- | | --- | --- | --- | | {tile(state[0])} | {tile(state[1])} | {tile(state[2])} | | 0 | 1 | 2 | | --- | --- | --- | | --- | --- | --- | | {tile(state[3])} | {tile(state[4])} | {tile(state[5])} | | 3 | 4 | 5 | | --- | --- | --- | | --- | --- | --- | | {tile(state[6])} | {tile(state[7])} | {tile(state[8])} | | 6 | 7 | 8 | | --- | --- | --- | | --- | --- | --- | """ )
Inherited members