Source code for mlpy.experiments.task

from __future__ import division, print_function, absolute_import

import numpy as np
from abc import ABCMeta, abstractmethod

from ..mdp.stateaction import State


[docs]class Task(object): """The task description base class. A task description describes the task the agent is to perform. The task description allows to configure :class:`.State` and :class:`.Action` by setting the number of features, the description and by overwriting the static functions :func:`~mlpy.mdp.stateaction.State.is_valid`, :func:`~mlpy.mdp.stateaction.MDPPrimitive.encode`, and :func:`~mlpy.mdp.stateaction.MDPPrimitive.decode` at runtime. Parameters ---------- env : Environment, optional The environment in which the agent performs the task. See Also -------- :class:`EpisodicTask`, :class:`SearchTask` Notes ----- Any task should inherit from this base class or any class deriving from this class. Every deriving class must overwrite the methods :meth:`_configure_state` and :meth:`_configure_action` to configure the classes :class:`.State` and :class:`.Action`, respectively. For both :class:`.State` and :class:`.Action` the appropriate class variables can be set by calling the following functions: * :func:`~mlpy.mdp.stateaction.State.set_nfeatures` * :func:`~mlpy.mdp.stateaction.State.set_dtype` * :func:`~mlpy.mdp.stateaction.State.set_description` * :func:`~mlpy.mdp.stateaction.State.set_discretized` * :func:`~mlpy.mdp.stateaction.State.set_minmax_features` * :func:`~mlpy.mdp.stateaction.State.set_states_per_dim` Overwrite the following :class:`.State` and :class:`.Action` methods to allow for more readable descriptions: * :func:`~mlpy.mdp.stateaction.State.encode` * :func:`~mlpy.mdp.stateaction.State.decode` Additionally, the :class:`.State` class provides a method to check a state's validity: * :func:`~mlpy.mdp.stateaction.State.is_valid` """ @property def is_episodic(self): """Identifies if the task is episodic or not. Returns ------- bool : Whether this task is episodic or not. """ return self._is_episodic @property def event_delay(self): """Event delay. The time in milliseconds (ms) by which the fsm event is delayed once termination is requested. Returns ------- float : The time in milliseconds. """ return self._event_delay_on_term def __init__(self, env=None): self._env = env self._configure_state() self._configure_action() self._is_episodic = False self._request_termination = False self._completed = False self._event_delay_on_term = 0.0 # noinspection PyUnusedLocal
[docs] def reset(self, t, **kwargs): """Reset the task. Parameters ---------- t : float The current time (sec). kwargs : dict, optional Non-positional parameters. """ self._request_termination = False self._completed = False
[docs] def request_termination(self, value): """Request termination of the task. Parameters ---------- value : bool The value to set the termination requested flag to. """ self._request_termination = value
[docs] def termination_requested(self): """Check if termination was requested. Returns ------- bool : Whether termination was requested or not. """ return self._request_termination
[docs] def terminate(self, value): """Set the termination flag. Parameters ---------- value : bool The value to set the termination flag to. """ self._completed = value
[docs] def is_complete(self): """Check if the task has completed. Returns ------- bool : Whether the task has completed or not. """ return self._completed
[docs] def sensation(self, **kwargs): """Gather the state feature information. Gather the state information (i.e. features) according to the task from the agent's senses. Parameters ---------- kwargs: dict Non-positional arguments needed for gathering the information. Returns ------- features : array, shape (`nfeatures`,) The sensed features Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
# noinspection PyUnusedLocal,PyMethodMayBeStatic
[docs] def get_reward(self, state, action): """Retrieve the reward. Retrieve the reward for the given state and action from the environment. Parameters ---------- state : State The current state. action : Action The current action. Returns ------- float : The reward. """ return None
# noinspection PyMethodMayBeStatic def _configure_state(self): """Configure :class:`.State`. Notes ----- The appropriate class variables can be set by calling the following functions: * :func:`~mlpy.mdp.stateaction.State.set_nfeatures` * :func:`~mlpy.mdp.stateaction.State.set_dtype` * :func:`~mlpy.mdp.stateaction.State.set_description` * :func:`~mlpy.mdp.stateaction.State.set_discretized` * :func:`~mlpy.mdp.stateaction.State.set_minmax_features` * :func:`~mlpy.mdp.stateaction.State.set_states_per_dim` Overwrite the following methods to allow for more readable descriptions and to validate the state: * :func:`~mlpy.mdp.stateaction.State.encode` * :func:`~mlpy.mdp.stateaction.State.decode` * :func:`~mlpy.mdp.stateaction.State.is_valid` """ pass # noinspection PyMethodMayBeStatic def _configure_action(self): """Configure :class:`.Action`. Notes ----- The appropriate class variables can be set by calling the following functions: * :func:`~mlpy.mdp.stateaction.State.set_nfeatures` * :func:`~mlpy.mdp.stateaction.State.set_dtype` * :func:`~mlpy.mdp.stateaction.State.set_description` * :func:`~mlpy.mdp.stateaction.State.set_discretized` * :func:`~mlpy.mdp.stateaction.State.set_minmax_features` * :func:`~mlpy.mdp.stateaction.State.set_states_per_dim` Overwrite the following methods to allow for more readable descriptions: * :func:`~mlpy.mdp.stateaction.State.encode` * :func:`~mlpy.mdp.stateaction.State.decode` """ pass
# noinspection PyAbstractClass
[docs]class EpisodicTask(Task): """The episodic task description base class. This class automatically identifies the task as an episodic task. An episodic task has a set of actions that transitions the agent into a terminal state. Once a terminal state is reached the task is complete. Parameters ---------- initial_states : str or State or list[str or State] List of possible initial states. terminal_states : str or State or list[str or State] List of terminal states. env : Environment, optional The environment in which the agent performs the task. Notes ----- Every deriving class must overwrite the methods :meth:`_configure_state` and :meth:`_configure_action` to configure the classes :class:`.State` and :class:`.Action`, respectively. For both :class:`.State` and :class:`.Action` the appropriate class variables can be set by calling the following functions: * :func:`~mlpy.mdp.stateaction.State.set_nfeatures` * :func:`~mlpy.mdp.stateaction.State.set_dtype` * :func:`~mlpy.mdp.stateaction.State.set_description` * :func:`~mlpy.mdp.stateaction.State.set_discretized` * :func:`~mlpy.mdp.stateaction.State.set_minmax_features` * :func:`~mlpy.mdp.stateaction.State.set_states_per_dim` Overwrite the following :class:`.State` and :class:`.Action` methods to allow for more readable descriptions: * :func:`~mlpy.mdp.stateaction.State.encode` * :func:`~mlpy.mdp.stateaction.State.decode` Additionally, the :class:`.State` class provides a method to check a state's validity. Overwrite this function to specify valid states: * :func:`~mlpy.mdp.stateaction.State.is_valid` """ def __init__(self, initial_states, terminal_states, env=None): super(EpisodicTask, self).__init__(env) self._is_episodic = True State.initial_states = initial_states State.terminal_states = terminal_states @staticmethod
[docs] def random_initial_state(): """Return a random initial state. Returns ------- str or State : A random initial state. """ if isinstance(State.initial_states, list): return np.random.choice(State.initial_states) return State.initial_states
# noinspection PyAbstractClass
[docs]class SearchTask(EpisodicTask): """The abstract class for a search task definition. Parameters ---------- initial_states : str or State or list[str or State] List of possible initial states. terminal_states : str or State or list[str or State] List of terminal states. env : Environment, optional The environment in which the agent performs the task. """ __metaclass__ = ABCMeta def __init__(self, initial_states, terminal_states=None, env=None): super(SearchTask, self).__init__(initial_states, terminal_states, env) @abstractmethod
[docs] def get_successor(self, state): """Find valid successors. Finds all valid successors (state-action pairs) for the given ``state``. Parameters ---------- state : int or tuple[int] The state from which to find successors. Returns ------- list[tuple(str, str or tuple[str])] : A list of all successor. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
@staticmethod
[docs] def get_path_cost(c, _): """Returns the cost for the current path. Parameters ---------- c : float The current cost for the path. Returns ------- float : The updated cost. """ return c + 1