Source code for mlpy.planners

"""
========================================
Planning tools (:mod:`mlpy.planners`)
========================================


.. automodule:: mlpy.planners.explorers
   :noindex:


.. currentmodule:: mlpy.planners

Planners
========

.. autosummary::
   :toctree: generated/
   :nosignatures:

   IPlanner


Discrete planners
-----------------

.. autosummary::
   :toctree: generated/
   :nosignatures:

   ~discrete.ValueIteration

"""
from __future__ import division, print_function, absolute_import

from abc import ABCMeta, abstractmethod
from ..modules import UniqueModule
from ..mdp.stateaction import Action

__all__ = ['explorers', 'discrete']


[docs]class IPlanner(UniqueModule):
    """The planner interface class.

    Parameters
    ----------
    explorer : Explorer
        The exploration strategy to employ. Available explorers are:

        :class:`.EGreedyExplorer`
            With :math:`\\epsilon` probability, a random action is
            chosen, otherwise the action resulting in the highest
            q-value is selected.

        :class:`.SoftmaxExplorer`
            The softmax explorer varies the action probability as a
            graded function of estimated value. The greedy action is
            still given the highest selection probability, but all the others
            are ranked and weighted according to their value estimates.

    """
    __metaclass__ = ABCMeta

    def __init__(self, explorer=None):
        """
        Initialization of the planner class.
        """
        super(IPlanner, self).__init__()

        self._history = {}
        """:type : dict[State,list[str]]"""
        self._current = -1

        self._explorer = explorer
        """:type: Explorer"""

    def __getstate__(self):
        return {'_history': self._history}

    def __setstate__(self, d):
        super(IPlanner, self).__setstate__(d)

        setattr(self, '_history', d['_history'])
        self._current = len(self._history[next(iter(self._history))]) - 1 if len(self._history) > 0 else -1

[docs]    def activate_exploration(self):
        """Turn the explorer on. """
        if self._explorer is not None:
            self._explorer.activate()

[docs]    def deactivate_exploration(self):
        """ Turn the explorer off. """
        if self._explorer is not None:
            self._explorer.deactivate()

    @abstractmethod
[docs]    def get_best_action(self, state):
        """ Choose the best next action for the agent to take.

        Parameters
        ----------
        state : State
            The state for which to choose the action for.

        Returns
        -------
        Action :
            The best action.

        Raises
        ------
        NotImplementedError
            If the child class does not implement this function.

        """
        raise NotImplementedError

    @abstractmethod
[docs]    def plan(self):
        """ Plan for the optimal policy.

        Raises
        ------
        NotImplementedError
            If the child class does not implement this function.

        """
        raise NotImplementedError

[docs]    def get_next_action(self, state, use_policy=False):
        """ Returns the optimal action for a state according to the current policy.

        Parameters
        ----------
        state : State
            The state for which to choose the next action for.
        use_policy : bool, optional
            When using a policy the next action is chosen according to the
            current policy, otherwise the best action is selected. Default
            is False.

        Returns
        -------
        Action :
            The next action.

        """
        if not use_policy:
            action = self.get_best_action(state)
        else:
            if not self._history:
                self.create_policy()

            action = self._history[state][self._current]
        return action

[docs]    def create_policy(self, func=None):
        """ Creates a policy (i.e., a state-action association).

        Parameters
        ----------
        func : callable, optional
            A callback function for mixing policies.

        """
        policy = self._create_policy(func)

        states = set(self._history).union(policy)

        # noinspection PyUnresolvedReferences
        n = len(self._history.itervalues().next()) if self._history else 0
        self._history = dict((s, (self._history.get(s, []) if self._history.get(s) is not None else [
                              Action.get_noop_action()] * n) + policy.get(s, [])) for s in states)
        self._current += 1

[docs]    def visualize(self):
        """ Visualize of the planning data.

        Raises
        ------
        NotImplementedError
            If the child class does not implement this function.

        """
        raise NotImplementedError

    def _create_policy(self, func=None):
        raise NotImplementedError