from __future__ import division, print_function, absolute_import
import numpy as np
from abc import ABCMeta, abstractmethod
from ..mdp.stateaction import State
[docs]class Task(object):
"""The task description base class.
A task description describes the task the agent is to perform. The task
description allows to configure :class:`.State` and :class:`.Action` by
setting the number of features, the description and by overwriting the
static functions :func:`~mlpy.mdp.stateaction.State.is_valid`,
:func:`~mlpy.mdp.stateaction.MDPPrimitive.encode`, and
:func:`~mlpy.mdp.stateaction.MDPPrimitive.decode` at runtime.
Parameters
----------
env : Environment, optional
The environment in which the agent performs the task.
See Also
--------
:class:`EpisodicTask`, :class:`SearchTask`
Notes
-----
Any task should inherit from this base class or any class deriving from
this class. Every deriving class must overwrite the methods :meth:`_configure_state`
and :meth:`_configure_action` to configure the classes :class:`.State` and
:class:`.Action`, respectively.
For both :class:`.State` and :class:`.Action` the appropriate class variables
can be set by calling the following functions:
* :func:`~mlpy.mdp.stateaction.State.set_nfeatures`
* :func:`~mlpy.mdp.stateaction.State.set_dtype`
* :func:`~mlpy.mdp.stateaction.State.set_description`
* :func:`~mlpy.mdp.stateaction.State.set_discretized`
* :func:`~mlpy.mdp.stateaction.State.set_minmax_features`
* :func:`~mlpy.mdp.stateaction.State.set_states_per_dim`
Overwrite the following :class:`.State` and :class:`.Action` methods to allow
for more readable descriptions:
* :func:`~mlpy.mdp.stateaction.State.encode`
* :func:`~mlpy.mdp.stateaction.State.decode`
Additionally, the :class:`.State` class provides a method to check a state's
validity:
* :func:`~mlpy.mdp.stateaction.State.is_valid`
"""
@property
def is_episodic(self):
"""Identifies if the task is episodic or not.
Returns
-------
bool :
Whether this task is episodic or not.
"""
return self._is_episodic
@property
def event_delay(self):
"""Event delay.
The time in milliseconds (ms) by which the fsm event is delayed
once termination is requested.
Returns
-------
float :
The time in milliseconds.
"""
return self._event_delay_on_term
def __init__(self, env=None):
self._env = env
self._configure_state()
self._configure_action()
self._is_episodic = False
self._request_termination = False
self._completed = False
self._event_delay_on_term = 0.0
# noinspection PyUnusedLocal
[docs] def reset(self, t, **kwargs):
"""Reset the task.
Parameters
----------
t : float
The current time (sec).
kwargs : dict, optional
Non-positional parameters.
"""
self._request_termination = False
self._completed = False
[docs] def request_termination(self, value):
"""Request termination of the task.
Parameters
----------
value : bool
The value to set the termination requested flag to.
"""
self._request_termination = value
[docs] def termination_requested(self):
"""Check if termination was requested.
Returns
-------
bool :
Whether termination was requested or not.
"""
return self._request_termination
[docs] def terminate(self, value):
"""Set the termination flag.
Parameters
----------
value : bool
The value to set the termination flag to.
"""
self._completed = value
[docs] def is_complete(self):
"""Check if the task has completed.
Returns
-------
bool :
Whether the task has completed or not.
"""
return self._completed
[docs] def sensation(self, **kwargs):
"""Gather the state feature information.
Gather the state information (i.e. features) according to
the task from the agent's senses.
Parameters
----------
kwargs: dict
Non-positional arguments needed for gathering the
information.
Returns
-------
features : array, shape (`nfeatures`,)
The sensed features
Raises
------
NotImplementedError
If the child class does not implement this function.
"""
raise NotImplementedError
# noinspection PyUnusedLocal,PyMethodMayBeStatic
[docs] def get_reward(self, state, action):
"""Retrieve the reward.
Retrieve the reward for the given state and action from
the environment.
Parameters
----------
state : State
The current state.
action : Action
The current action.
Returns
-------
float :
The reward.
"""
return None
# noinspection PyMethodMayBeStatic
def _configure_state(self):
"""Configure :class:`.State`.
Notes
-----
The appropriate class variables can be set by calling the
following functions:
* :func:`~mlpy.mdp.stateaction.State.set_nfeatures`
* :func:`~mlpy.mdp.stateaction.State.set_dtype`
* :func:`~mlpy.mdp.stateaction.State.set_description`
* :func:`~mlpy.mdp.stateaction.State.set_discretized`
* :func:`~mlpy.mdp.stateaction.State.set_minmax_features`
* :func:`~mlpy.mdp.stateaction.State.set_states_per_dim`
Overwrite the following methods to allow for more readable
descriptions and to validate the state:
* :func:`~mlpy.mdp.stateaction.State.encode`
* :func:`~mlpy.mdp.stateaction.State.decode`
* :func:`~mlpy.mdp.stateaction.State.is_valid`
"""
pass
# noinspection PyMethodMayBeStatic
def _configure_action(self):
"""Configure :class:`.Action`.
Notes
-----
The appropriate class variables can be set by calling the
following functions:
* :func:`~mlpy.mdp.stateaction.State.set_nfeatures`
* :func:`~mlpy.mdp.stateaction.State.set_dtype`
* :func:`~mlpy.mdp.stateaction.State.set_description`
* :func:`~mlpy.mdp.stateaction.State.set_discretized`
* :func:`~mlpy.mdp.stateaction.State.set_minmax_features`
* :func:`~mlpy.mdp.stateaction.State.set_states_per_dim`
Overwrite the following methods to allow for more readable
descriptions:
* :func:`~mlpy.mdp.stateaction.State.encode`
* :func:`~mlpy.mdp.stateaction.State.decode`
"""
pass
# noinspection PyAbstractClass
[docs]class EpisodicTask(Task):
"""The episodic task description base class.
This class automatically identifies the task as an episodic task.
An episodic task has a set of actions that transitions the agent
into a terminal state. Once a terminal state is reached the task
is complete.
Parameters
----------
initial_states : str or State or list[str or State]
List of possible initial states.
terminal_states : str or State or list[str or State]
List of terminal states.
env : Environment, optional
The environment in which the agent performs the task.
Notes
-----
Every deriving class must overwrite the methods :meth:`_configure_state`
and :meth:`_configure_action` to configure the classes :class:`.State` and
:class:`.Action`, respectively.
For both :class:`.State` and :class:`.Action` the appropriate class variables
can be set by calling the following functions:
* :func:`~mlpy.mdp.stateaction.State.set_nfeatures`
* :func:`~mlpy.mdp.stateaction.State.set_dtype`
* :func:`~mlpy.mdp.stateaction.State.set_description`
* :func:`~mlpy.mdp.stateaction.State.set_discretized`
* :func:`~mlpy.mdp.stateaction.State.set_minmax_features`
* :func:`~mlpy.mdp.stateaction.State.set_states_per_dim`
Overwrite the following :class:`.State` and :class:`.Action` methods to allow
for more readable descriptions:
* :func:`~mlpy.mdp.stateaction.State.encode`
* :func:`~mlpy.mdp.stateaction.State.decode`
Additionally, the :class:`.State` class provides a method to check a state's
validity. Overwrite this function to specify valid states:
* :func:`~mlpy.mdp.stateaction.State.is_valid`
"""
def __init__(self, initial_states, terminal_states, env=None):
super(EpisodicTask, self).__init__(env)
self._is_episodic = True
State.initial_states = initial_states
State.terminal_states = terminal_states
@staticmethod
[docs] def random_initial_state():
"""Return a random initial state.
Returns
-------
str or State :
A random initial state.
"""
if isinstance(State.initial_states, list):
return np.random.choice(State.initial_states)
return State.initial_states
# noinspection PyAbstractClass
[docs]class SearchTask(EpisodicTask):
"""The abstract class for a search task definition.
Parameters
----------
initial_states : str or State or list[str or State]
List of possible initial states.
terminal_states : str or State or list[str or State]
List of terminal states.
env : Environment, optional
The environment in which the agent performs the task.
"""
__metaclass__ = ABCMeta
def __init__(self, initial_states, terminal_states=None, env=None):
super(SearchTask, self).__init__(initial_states, terminal_states, env)
@abstractmethod
[docs] def get_successor(self, state):
"""Find valid successors.
Finds all valid successors (state-action pairs) for the given ``state``.
Parameters
----------
state : int or tuple[int]
The state from which to find successors.
Returns
-------
list[tuple(str, str or tuple[str])] :
A list of all successor.
Raises
------
NotImplementedError
If the child class does not implement this function.
"""
raise NotImplementedError
@staticmethod
[docs] def get_path_cost(c, _):
"""Returns the cost for the current path.
Parameters
----------
c : float
The current cost for the path.
Returns
-------
float :
The updated cost.
"""
return c + 1