Source code for mlpy.planners

"""
========================================
Planning tools (:mod:`mlpy.planners`)
========================================


.. automodule:: mlpy.planners.explorers
   :noindex:


.. currentmodule:: mlpy.planners

Planners
========

.. autosummary::
   :toctree: generated/
   :nosignatures:

   IPlanner


Discrete planners
-----------------

.. autosummary::
   :toctree: generated/
   :nosignatures:

   ~discrete.ValueIteration

"""
from __future__ import division, print_function, absolute_import

from abc import ABCMeta, abstractmethod
from ..modules import UniqueModule
from ..mdp.stateaction import Action

__all__ = ['explorers', 'discrete']


[docs]class IPlanner(UniqueModule): """The planner interface class. Parameters ---------- explorer : Explorer The exploration strategy to employ. Available explorers are: :class:`.EGreedyExplorer` With :math:`\\epsilon` probability, a random action is chosen, otherwise the action resulting in the highest q-value is selected. :class:`.SoftmaxExplorer` The softmax explorer varies the action probability as a graded function of estimated value. The greedy action is still given the highest selection probability, but all the others are ranked and weighted according to their value estimates. """ __metaclass__ = ABCMeta def __init__(self, explorer=None): """ Initialization of the planner class. """ super(IPlanner, self).__init__() self._history = {} """:type : dict[State,list[str]]""" self._current = -1 self._explorer = explorer """:type: Explorer""" def __getstate__(self): return {'_history': self._history} def __setstate__(self, d): super(IPlanner, self).__setstate__(d) setattr(self, '_history', d['_history']) self._current = len(self._history[next(iter(self._history))]) - 1 if len(self._history) > 0 else -1
[docs] def activate_exploration(self): """Turn the explorer on. """ if self._explorer is not None: self._explorer.activate()
[docs] def deactivate_exploration(self): """ Turn the explorer off. """ if self._explorer is not None: self._explorer.deactivate()
@abstractmethod
[docs] def get_best_action(self, state): """ Choose the best next action for the agent to take. Parameters ---------- state : State The state for which to choose the action for. Returns ------- Action : The best action. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
@abstractmethod
[docs] def plan(self): """ Plan for the optimal policy. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
[docs] def get_next_action(self, state, use_policy=False): """ Returns the optimal action for a state according to the current policy. Parameters ---------- state : State The state for which to choose the next action for. use_policy : bool, optional When using a policy the next action is chosen according to the current policy, otherwise the best action is selected. Default is False. Returns ------- Action : The next action. """ if not use_policy: action = self.get_best_action(state) else: if not self._history: self.create_policy() action = self._history[state][self._current] return action
[docs] def create_policy(self, func=None): """ Creates a policy (i.e., a state-action association). Parameters ---------- func : callable, optional A callback function for mixing policies. """ policy = self._create_policy(func) states = set(self._history).union(policy) # noinspection PyUnresolvedReferences n = len(self._history.itervalues().next()) if self._history else 0 self._history = dict((s, (self._history.get(s, []) if self._history.get(s) is not None else [ Action.get_noop_action()] * n) + policy.get(s, [])) for s in states) self._current += 1
[docs] def visualize(self): """ Visualize of the planning data. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
def _create_policy(self, func=None): raise NotImplementedError