Coverage for local/lib/python2.7/site-packages/sage/stats/hmm/distributions.pyx : 78%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
|
""" Distributions used in implementing Hidden Markov Models
These distribution classes are designed specifically for HMM's and not for general use in statistics. For example, they have fixed or non-fixed status, which only make sense relative to being used in a hidden Markov model.
AUTHOR:
- William Stein, 2010-03 """
############################################################################# # Copyright (C) 2010 William Stein <wstein@gmail.com> # Distributed under the terms of the GNU General Public License (GPL) # The full text of the GPL is available at: # http://www.gnu.org/licenses/ ############################################################################# from __future__ import absolute_import
from cpython.object cimport PyObject_RichCompare
cdef extern from "math.h": double exp(double) double log(double) double sqrt(double)
from sage.misc.randstate cimport current_randstate, randstate from sage.finance.time_series cimport TimeSeries
cdef double random_normal(double mean, double std, randstate rstate): """ Return a floating point number chosen from the normal distribution with given mean and standard deviation, using the given randstate. The computation uses the box muller algorithm.
INPUT:
- mean -- float; the mean - std -- float; the standard deviation - rstate -- randstate; the random number generator state
OUTPUT:
- double """ # Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c # This the box muller algorithm. # Client code can get the current random state from: # cdef randstate rstate = current_randstate() cdef double x1, x2, w, y1, y2
# Abstract base class for distributions used for hidden Markov models.
cdef class Distribution: """ A distribution. """ def sample(self, n=None): """ Return either a single sample (the default) or n samples from this probability distribution.
INPUT:
- n -- None or a positive integer
OUTPUT:
- a single sample if n is 1; otherwise many samples
EXAMPLES:
This method must be defined in a derived class::
sage: import sage.stats.hmm.distributions sage: sage.stats.hmm.distributions.Distribution().sample() Traceback (most recent call last): ... NotImplementedError """
def prob(self, x): """ The probability density function evaluated at x.
INPUT:
- x -- object
OUTPUT:
- float
EXAMPLES:
This method must be defined in a derived class::
sage: import sage.stats.hmm.distributions sage: sage.stats.hmm.distributions.Distribution().prob(0) Traceback (most recent call last): ... NotImplementedError """
def plot(self, *args, **kwds): """ Return a plot of the probability density function.
INPUT:
- args and kwds, passed to the Sage plot function
OUTPUT:
- a Graphics object
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.plot(-10,30) Graphics object consisting of 1 graphics primitive """
cdef class GaussianMixtureDistribution(Distribution): """ A probability distribution defined by taking a weighted linear combination of Gaussian distributions.
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.3,1,2),(.7,-1,1)]); P 0.3*N(1.0,2.0) + 0.7*N(-1.0,1.0) sage: P[0] (0.3, 1.0, 2.0) sage: P.is_fixed() False sage: P.fix(1) sage: P.is_fixed(0) False sage: P.is_fixed(1) True sage: P.unfix(1) sage: P.is_fixed(1) False """ def __init__(self, B, eps=1e-8, bint normalize=True): """ INPUT:
- `B` -- a list of triples `(c_i, mean_i, std_i)`, where the `c_i` and `std_i` are positive and the sum of the `c_i` is `1`.
- eps -- positive real number; any standard deviation in B less than eps is replaced by eps.
- normalize -- if True, ensure that the c_i are nonnegative
EXAMPLES::
sage: hmm.GaussianMixtureDistribution([(.3,1,2),(.7,-1,1)]) 0.3*N(1.0,2.0) + 0.7*N(-1.0,1.0) sage: hmm.GaussianMixtureDistribution([(1,-1,0)], eps=1e-3) 1.0*N(-1.0,0.001) """ cdef double s s = 1.0/len(B) for a in B: a[0] = s else:
def __getitem__(self, Py_ssize_t i): """ Returns triple (coefficient, mu, std).
INPUT:
- i -- integer
OUTPUT:
- triple of floats
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P[0] (0.2, -10.0, 0.5) sage: P[2] (0.2, 20.0, 0.5) sage: [-1] [-1] sage: P[-1] (0.2, 20.0, 0.5) sage: P[3] Traceback (most recent call last): ... IndexError: index out of range sage: P[-4] Traceback (most recent call last): ... IndexError: index out of range """
def __reduce__(self): """ Used in pickling.
EXAMPLES::
sage: G = hmm.GaussianMixtureDistribution([(.1,1,2), (.9,0,1)]) sage: loads(dumps(G)) == G True """
def __richcmp__(self, other, op): """ EXAMPLES::
sage: G = hmm.GaussianMixtureDistribution([(.1,1,2), (.9,0,1)]) sage: H = hmm.GaussianMixtureDistribution([(.3,1,2), (.7,1,5)]) sage: G < H True sage: H > G True sage: G == H False sage: G == G True """ return NotImplemented
def __len__(self): """ Return the number of components of this GaussianMixtureDistribution.
EXAMPLES::
sage: len(hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)])) 3 """
cpdef is_fixed(self, i=None): """ Return whether or not this GaussianMixtureDistribution is fixed when using Baum-Welch to update the corresponding HMM.
INPUT:
- i -- None (default) or integer; if given, only return whether the i-th component is fixed
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.is_fixed() False sage: P.is_fixed(0) False sage: P.fix(0); P.is_fixed() False sage: P.is_fixed(0) True sage: P.fix(); P.is_fixed() True """ else:
def fix(self, i=None): """ Set that this GaussianMixtureDistribution (or its ith component) is fixed when using Baum-Welch to update the corresponding HMM.
INPUT:
- i -- None (default) or integer; if given, only fix the i-th component
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.fix(1); P.is_fixed() False sage: P.is_fixed(1) True sage: P.fix(); P.is_fixed() True """ cdef int j else:
def unfix(self, i=None): """ Set that this GaussianMixtureDistribution (or its ith component) is not fixed when using Baum-Welch to update the corresponding HMM.
INPUT:
- i -- None (default) or integer; if given, only fix the i-th component
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.fix(1); P.is_fixed(1) True sage: P.unfix(1); P.is_fixed(1) False sage: P.fix(); P.is_fixed() True sage: P.unfix(); P.is_fixed() False
""" cdef int j else:
def __repr__(self): """ Return string representation of this mixed Gaussian distribution.
EXAMPLES::
sage: hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]).__repr__() '0.2*N(-10.0,0.5) + 0.6*N(1.0,1.0) + 0.2*N(20.0,0.5)' """
def sample(self, n=None): """ Return a single sample from this distribution (by default), or if n>1, return a TimeSeries of samples.
INPUT:
- n -- integer or None (default: None)
OUTPUT:
- float if n is None (default); otherwise a TimeSeries
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.sample() 19.65824361087513 sage: P.sample(1) [-10.4683] sage: P.sample(5) [-0.1688, -10.3479, 1.6812, 20.1083, -9.9801] sage: P.sample(0) [] sage: P.sample(-3) Traceback (most recent call last): ... ValueError: n must be nonnegative """ cdef Py_ssize_t i cdef TimeSeries T else:
cdef double _sample(self, randstate rstate): """ Used internally to compute a sample from this distribution quickly.
INPUT:
- rstate -- a randstate object
OUTPUT:
- double """ cdef double accum, r cdef int n
# See the remark in hmm.pyx about using GSL to remove this # silly way of sampling from a discrete distribution. raise RuntimeError("invalid probability distribution")
cpdef double prob(self, double x): """ Return the probability of x.
Since this is a continuous distribution, this is defined to be the limit of the p's such that the probability of [x,x+h] is p*h.
INPUT:
- x -- float
OUTPUT:
- float
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.prob(.5) 0.21123919605857971 sage: P.prob(-100) 0.0 sage: P.prob(20) 0.1595769121605731 """ # The tricky-looking code below is a fast version of this: # return sum([c/(sqrt(2*math.pi)*std) * \ # exp(-(x-mean)*(x-mean)/(2*std*std)) for # c, mean, std in self.B]) cdef int n
cpdef double prob_m(self, double x, int m): """ Return the probability of x using just the m-th summand.
INPUT:
- x -- float - m -- integer
OUTPUT:
- float
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.prob_m(.5, 0) 2.7608117680508...e-97 sage: P.prob_m(.5, 1) 0.21123919605857971 sage: P.prob_m(.5, 2) 0.0 """ cdef double s, mu raise IndexError("index out of range")
TimeSeries param, IntList fixed): """ Used in unpickling GaussianMixtureDistribution's.
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: loads(dumps(P)) == P # indirect doctest True """ |