Source code for mlpy.learners

"""
=============================================
Learning algorithms (:mod:`mlpy.learners`)
=============================================

.. currentmodule:: mlpy.learners


.. autosummary::
   :toctree: generated/
   :nosignatures:

   LearnerFactory
   ILearner


.. automodule:: mlpy.learners.online
   :noindex:


.. automodule:: mlpy.learners.offline
   :noindex:

"""
from __future__ import division, print_function, absolute_import

from abc import abstractmethod
from ..modules.patterns import RegistryInterface
from ..modules import UniqueModule


[docs]class LearnerFactory(object): """The learner factory. An instance of a learner can be created by passing the learner type. Examples -------- >>> from mlpy.learners import LearnerFactory >>> q0 = LearnerFactory.create('qlearner') This creates a :class:`.QLearner` instance with default parameters. >>> q1 = LearnerFactory.create('qlearner', max_steps=10) This creates a :class:`.QLearner` instance with max_steps set to 10. """ @staticmethod
[docs] def create(_type, *args, **kwargs): """ Create an learner of the given type. A new learner of the given type is created. If `progress` is among the keywords in `kwargs`, the factory attempts to recover the learner from the learner state saved to file `filename`. If the factory fails to load the learners state from file, a new learner is created. Parameters ---------- _type : str The learner type. Valid learner types: qlearner Performs q-learning, a reinforcement learning variant. A :class:`.QLearner` module is created. rldtlearner The learner performs reinforcement learning with decision trees (RLDT), a method introduced by Hester, Quinlan, and Stone which builds a generalized model for the transitions and rewards of the environment. A :class:`.RLDTLearner` module is created. apprenticeshiplearner The learner performs apprenticeship learning via inverse reinforcement learning, a method introduced by Abbeel and Ng which strives to imitate the demonstrations given by an expert. A :class:`.ApprenticeshipLearner` module is create. incrapprenticeshiplearner The learner incrementally performs apprenticeship learning via inverse reinforcement learning. Inverse reinforcement learning assumes knowledge of the underlying model. However, this is not always feasible. The incremental apprenticeship learner updates its model after every iteration by executing the current policy. A :class:`.IncrApprenticeshipLearner` module is create. args : tuple, optional Positional arguments passed to the class of the given type for initialization. kwargs : dict, optional Non-positional arguments passed to the class of the given type for initialization. Returns ------- ILearner : A learner instance of the given type. """ loaded = False # noinspection PyUnresolvedReferences learner = ILearner.registry[_type.lower()] if 'progress' in kwargs: if kwargs['progress']: try: learner = learner.load(kwargs['filename']) loaded = True except IOError: pass except KeyError: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) sys.exit(1) del kwargs['progress'] if not loaded: learner = learner(*args, **kwargs) return learner
[docs]class ILearner(UniqueModule): """ The learner interface. Both online and offline learner inherit from this interface. Parameters ---------- filename : str, optional The name of the file to save the learner state to after each iteration. If None is given, the learner state is not saved. Default is None. """ __metaclass__ = RegistryInterface @property def type(self): """The type of the learner (i.e., `online` and `offline`). During online learning the learning is performed during the episode or iteration, while offline learner do not perform the learning step until the end of the episode or iteration. This property must be overwritten by its deriving class. Returns ------- str : The type. Values can be either `online` or `offline`. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError def __init__(self, filename=None): """ Learner initialization. """ super(ILearner, self).__init__() self._filename = filename # noinspection PyMethodMayBeStatic def __getstate__(self): return {} def __setstate__(self, d): super(ILearner, self).__setstate__(d) # noinspection PyUnusedLocal
[docs] def reset(self, t, **kwargs): """Reset reinforcement learner. Reset the learner before start of a new episode or iteration and save the state of the learner to file. Parameters ---------- t : float The current time (sec) kwargs : dict, optional Non-positional parameters, optional. """ self.save(self._filename)
[docs] def execute(self, experience): """Execute learning specific updates. Learning specific updates are performed, e.g. model updates. Parameters ---------- experience : Experience The actor's current experience consisting of previous state, the action performed in that state, the current state, and the reward awarded. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
@abstractmethod
[docs] def learn(self): """Learn a policy from the experience. Perform the learning step to derive a new policy taking the latest experience into account. Parameters ---------- experience : Experience The agent's experience consisting of the previous state, the action performed in that state, the current state and the reward awarded. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
[docs] def choose_action(self, state): """Choose the next action The next action is chosen according to the current policy and the selected exploration strategy. Parameters ---------- state : State The current state. Returns ------- Action : The chosen action. Raises ------ NotImplementedError If the child class does not implement this function. """ raise NotImplementedError
from .online import * from .offline import * __all__ = [s for s in dir() if not (s.startswith('_') or s.endswith('cython'))]