Source code for mlpy.auxiliary.array

"""
.. module:: mlpy.auxiliary.array
   :platform: Unix, Windows
   :synopsis: Numpy array utility functions.

.. moduleauthor:: Astrid Jackson <ajackson@eecs.ucf.edu>
"""
from __future__ import division, print_function, absolute_import

from itertools import product
import numpy as np


[docs]def accum(accmap, a, func=None, size=None, fill_value=0, dtype=None): # noinspection PyTypeChecker """An accumulation function similar to Matlab's `accumarray` function. Parameters ---------- accmap : array_like This is the "accumulation map". It maps input (i.e. indices into `a`) to their destination in the output array. The first `a.ndim` dimensions of `accmap` must be the same as `a.shape`. That is, `accmap.shape[:a.ndim]` must equal `a.shape`. For example, if `a` has shape (15,4), then `accmap.shape[:2]` must equal (15,4). In this case `accmap[i,j]` gives the index into the output array where element (i,j) of `a` is to be accumulated. If the output is, say, a 2D, then `accmap` must have shape (15,4,2). The value in the last dimension give indices into the output array. If the output is 1D, then the shape of `accmap` can be either (15,4) or (15,4,1) a : array_like or float or int The input data to be accumulated. func : callable or None The accumulation function. The function will be passed a list of values from `a` to be accumulated. If None, numpy.sum is assumed. size : array_like or tuple The size of the output array. If None, the size will be determined from `accmap`. fill_value : scalar The default value for elements of the output array. dtype : dtype The data type of the output array. If None, the data type of `a` is used. Returns ------- array_like : The accumulated results. The shape of `out` is `size` if `size` is given. Otherwise the shape is determined by the (lexicographically) largest indices of the output found in `accmap`. Examples -------- >>> from numpy import array, prod, float64 >>> a = array([[1,2,3],[4,-1,6],[-1,8,9]]) >>> a array([[ 1, 2, 3], [ 4, -1, 6], [-1, 8, 9]]) Sum the diagonals: >>> accmap = array([[0,1,2],[2,0,1],[1,2,0]]) >>> s = accum(accmap, a) array([9, 7, 15]) A 2D output, from sub-arrays with shapes and positions like this: | [ (2,2) (2,1)] | [ (1,2) (1,1)] >>> accmap = array([ ... [[0,0],[0,0],[0,1]], ... [[0,0],[0,0],[0,1]], ... [[1,0],[1,0],[1,1]], ... ]) Accumulate using a product: >>> accum(accmap, a, func=prod, dtype=float64) array([[ -8., 18.], [ -8., 9.]]) Same accmap, but create an array of lists of values: >>> accum(accmap, a, func=lambda x: x, dtype='O') array([[[1, 2, 4, -1], [3, 6]], [[-1, 8], [9]]], dtype=object) .. note:: Adapted from | Project: Code from `SciPy Cookbook <http://wiki.scipy.org/Cookbook/AccumarrayLike>`_. | Code author: Warren Weckesser | License: `CC-Wiki <http://creativecommons.org/licenses/by-sa/3.0/>`_ """ # Check for bad arguments and handle the defaults. if hasattr(a, "__len__") and accmap.shape[:a.ndim] != a.shape: raise ValueError("The initial dimensions of accmap must be the same as a.shape") if func is None: func = np.sum if dtype is None: dtype = np.float64 if hasattr(a, "__len__"): # noinspection PyUnresolvedReferences dtype = a.dtype if hasattr(a, "__len__") and accmap.shape == a.shape: accmap = np.expand_dims(accmap, -1) if not hasattr(a, "__len__"): c = np.ascontiguousarray(accmap).view(np.dtype((np.void, accmap.dtype.itemsize * accmap.shape[1]))) unique_x = np.unique(c).view(accmap.dtype).reshape(-1, accmap.shape[1]) if size is None: adims = unique_x.shape[1] size = (adims, adims) size = np.atleast_1d(size) out = np.zeros(size, dtype=dtype) for seq in unique_x: cmd = "accmap[np.where(" idx = "" for i, ele in enumerate(seq): if i > 0: cmd += " * " idx += ", " cmd += "(accmap[:,%d] == %d)" % (i, ele) idx += "%d" % ele cmd += ")]" out[eval(idx)] = eval(cmd).shape[0] return out adims = tuple(range(a.ndim)) if size is None: size = 1 + np.squeeze(np.apply_over_axes(np.max, accmap, axes=adims)) size = np.atleast_1d(size) # Create an array of python lists of values. vals = np.empty(size, dtype='O') for s in product(*[range(k) for k in size]): vals[s] = [] for s in product(*[range(k) for k in a.shape]): indx = tuple(accmap[s]) # noinspection PyUnresolvedReferences val = a[s] vals[indx].append(val) # Create the output array. out = np.empty(size, dtype=dtype) for s in product(*[range(k) for k in size]): if not vals[s]: out[s] = fill_value else: # noinspection PyCallingNonCallable out[s] = func(vals[s]) return out
[docs]def normalize(a, axis=None, return_scale=False): """Normalize the input array to sum to `1`. Parameters ---------- a : array_like, shape (`nsamples`, `nfeatures`) Non-normalized input data array. axis : int Dimension along which normalization is performed. Returns ------- array_like, shape (`nsamples`, `nfeatures`) : An array with values normalized (summing to 1) along the prescribed axis. Examples -------- >>> .. attention:: The input array `a` is modified inplace. """ a += np.finfo(float).eps asum = a.sum(axis) if axis and a.ndim > 1: # Make sure we don't divide by zero. asum[asum == 0] = 1 shape = list(a.shape) shape[axis] = 1 asum.shape = shape a = np.true_divide(a, asum) # TODO: should return nothing, since the operation is inplace. if not return_scale: ret = a else: ret = (a,) ret += (asum,) return ret
[docs]def nunique(x, axis=None): """Efficiently count the unique elements of `x` along the given axis. Parameters ---------- x : array_like The array for which to count the unique elements. axis : int Dimension along which to count the unique elements. Returns ------- int or array_like : The number of unique elements along the given axis. Examples -------- >>> .. note:: Ported from Matlab: | Project: `Probabilistic Modeling Toolkit for Matlab/Octave <https://github.com/probml/pmtk3>`_. | Copyright (2010) Kevin Murphy and Matt Dunham | License: `MIT <https://github.com/probml/pmtk3/blob/5fefd068a2e84ae508684d3e4750bd72a4164ba0/license.txt>`_ """ axis = 0 if axis is None else axis n = np.sum(np.diff(np.sort(x, axis=axis), axis=axis) > 0, axis=axis) + 1 return n