Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
""" Distributions used in implementing Hidden Markov Models
These distribution classes are designed specifically for HMM's and not for general use in statistics. For example, they have fixed or non-fixed status, which only make sense relative to being used in a hidden Markov model.
AUTHOR:
- William Stein, 2010-03 """
############################################################################# # Copyright (C) 2010 William Stein <wstein@gmail.com> # Distributed under the terms of the GNU General Public License (GPL) # The full text of the GPL is available at: # http://www.gnu.org/licenses/ ############################################################################# from __future__ import absolute_import
from cpython.object cimport PyObject_RichCompare
cdef extern from "math.h": double exp(double) double log(double) double sqrt(double)
from sage.misc.randstate cimport current_randstate, randstate from sage.finance.time_series cimport TimeSeries
cdef double random_normal(double mean, double std, randstate rstate): """ Return a floating point number chosen from the normal distribution with given mean and standard deviation, using the given randstate. The computation uses the box muller algorithm.
INPUT:
- mean -- float; the mean - std -- float; the standard deviation - rstate -- randstate; the random number generator state
OUTPUT:
- double """ # Ported from http://users.tkk.fi/~nbeijar/soft/terrain/source_o2/boxmuller.c # This the box muller algorithm. # Client code can get the current random state from: # cdef randstate rstate = current_randstate() cdef double x1, x2, w, y1, y2
# Abstract base class for distributions used for hidden Markov models.
cdef class Distribution: """ A distribution. """ def sample(self, n=None): """ Return either a single sample (the default) or n samples from this probability distribution.
INPUT:
- n -- None or a positive integer
OUTPUT:
- a single sample if n is 1; otherwise many samples
EXAMPLES:
This method must be defined in a derived class::
sage: import sage.stats.hmm.distributions sage: sage.stats.hmm.distributions.Distribution().sample() Traceback (most recent call last): ... NotImplementedError """
def prob(self, x): """ The probability density function evaluated at x.
INPUT:
- x -- object
OUTPUT:
- float
EXAMPLES:
This method must be defined in a derived class::
sage: import sage.stats.hmm.distributions sage: sage.stats.hmm.distributions.Distribution().prob(0) Traceback (most recent call last): ... NotImplementedError """
def plot(self, *args, **kwds): """ Return a plot of the probability density function.
INPUT:
- args and kwds, passed to the Sage plot function
OUTPUT:
- a Graphics object
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.plot(-10,30) Graphics object consisting of 1 graphics primitive """
cdef class GaussianMixtureDistribution(Distribution): """ A probability distribution defined by taking a weighted linear combination of Gaussian distributions.
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.3,1,2),(.7,-1,1)]); P 0.3*N(1.0,2.0) + 0.7*N(-1.0,1.0) sage: P[0] (0.3, 1.0, 2.0) sage: P.is_fixed() False sage: P.fix(1) sage: P.is_fixed(0) False sage: P.is_fixed(1) True sage: P.unfix(1) sage: P.is_fixed(1) False """ def __init__(self, B, eps=1e-8, bint normalize=True): """ INPUT:
- `B` -- a list of triples `(c_i, mean_i, std_i)`, where the `c_i` and `std_i` are positive and the sum of the `c_i` is `1`.
- eps -- positive real number; any standard deviation in B less than eps is replaced by eps.
- normalize -- if True, ensure that the c_i are nonnegative
EXAMPLES::
sage: hmm.GaussianMixtureDistribution([(.3,1,2),(.7,-1,1)]) 0.3*N(1.0,2.0) + 0.7*N(-1.0,1.0) sage: hmm.GaussianMixtureDistribution([(1,-1,0)], eps=1e-3) 1.0*N(-1.0,0.001) """ cdef double s s = 1.0/len(B) for a in B: a[0] = s else:
def __getitem__(self, Py_ssize_t i): """ Returns triple (coefficient, mu, std).
INPUT:
- i -- integer
OUTPUT:
- triple of floats
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P[0] (0.2, -10.0, 0.5) sage: P[2] (0.2, 20.0, 0.5) sage: [-1] [-1] sage: P[-1] (0.2, 20.0, 0.5) sage: P[3] Traceback (most recent call last): ... IndexError: index out of range sage: P[-4] Traceback (most recent call last): ... IndexError: index out of range """
def __reduce__(self): """ Used in pickling.
EXAMPLES::
sage: G = hmm.GaussianMixtureDistribution([(.1,1,2), (.9,0,1)]) sage: loads(dumps(G)) == G True """
def __richcmp__(self, other, op): """ EXAMPLES::
sage: G = hmm.GaussianMixtureDistribution([(.1,1,2), (.9,0,1)]) sage: H = hmm.GaussianMixtureDistribution([(.3,1,2), (.7,1,5)]) sage: G < H True sage: H > G True sage: G == H False sage: G == G True """ return NotImplemented
def __len__(self): """ Return the number of components of this GaussianMixtureDistribution.
EXAMPLES::
sage: len(hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)])) 3 """
cpdef is_fixed(self, i=None): """ Return whether or not this GaussianMixtureDistribution is fixed when using Baum-Welch to update the corresponding HMM.
INPUT:
- i -- None (default) or integer; if given, only return whether the i-th component is fixed
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.is_fixed() False sage: P.is_fixed(0) False sage: P.fix(0); P.is_fixed() False sage: P.is_fixed(0) True sage: P.fix(); P.is_fixed() True """ else:
def fix(self, i=None): """ Set that this GaussianMixtureDistribution (or its ith component) is fixed when using Baum-Welch to update the corresponding HMM.
INPUT:
- i -- None (default) or integer; if given, only fix the i-th component
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.fix(1); P.is_fixed() False sage: P.is_fixed(1) True sage: P.fix(); P.is_fixed() True """ cdef int j else:
def unfix(self, i=None): """ Set that this GaussianMixtureDistribution (or its ith component) is not fixed when using Baum-Welch to update the corresponding HMM.
INPUT:
- i -- None (default) or integer; if given, only fix the i-th component
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.fix(1); P.is_fixed(1) True sage: P.unfix(1); P.is_fixed(1) False sage: P.fix(); P.is_fixed() True sage: P.unfix(); P.is_fixed() False
""" cdef int j else:
def __repr__(self): """ Return string representation of this mixed Gaussian distribution.
EXAMPLES::
sage: hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]).__repr__() '0.2*N(-10.0,0.5) + 0.6*N(1.0,1.0) + 0.2*N(20.0,0.5)' """
def sample(self, n=None): """ Return a single sample from this distribution (by default), or if n>1, return a TimeSeries of samples.
INPUT:
- n -- integer or None (default: None)
OUTPUT:
- float if n is None (default); otherwise a TimeSeries
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.sample() 19.65824361087513 sage: P.sample(1) [-10.4683] sage: P.sample(5) [-0.1688, -10.3479, 1.6812, 20.1083, -9.9801] sage: P.sample(0) [] sage: P.sample(-3) Traceback (most recent call last): ... ValueError: n must be nonnegative """ cdef Py_ssize_t i cdef TimeSeries T else:
cdef double _sample(self, randstate rstate): """ Used internally to compute a sample from this distribution quickly.
INPUT:
- rstate -- a randstate object
OUTPUT:
- double """ cdef double accum, r cdef int n
# See the remark in hmm.pyx about using GSL to remove this # silly way of sampling from a discrete distribution. raise RuntimeError("invalid probability distribution")
cpdef double prob(self, double x): """ Return the probability of x.
Since this is a continuous distribution, this is defined to be the limit of the p's such that the probability of [x,x+h] is p*h.
INPUT:
- x -- float
OUTPUT:
- float
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.prob(.5) 0.21123919605857971 sage: P.prob(-100) 0.0 sage: P.prob(20) 0.1595769121605731 """ # The tricky-looking code below is a fast version of this: # return sum([c/(sqrt(2*math.pi)*std) * \ # exp(-(x-mean)*(x-mean)/(2*std*std)) for # c, mean, std in self.B]) cdef int n
cpdef double prob_m(self, double x, int m): """ Return the probability of x using just the m-th summand.
INPUT:
- x -- float - m -- integer
OUTPUT:
- float
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: P.prob_m(.5, 0) 2.7608117680508...e-97 sage: P.prob_m(.5, 1) 0.21123919605857971 sage: P.prob_m(.5, 2) 0.0 """ cdef double s, mu raise IndexError("index out of range")
TimeSeries param, IntList fixed): """ Used in unpickling GaussianMixtureDistribution's.
EXAMPLES::
sage: P = hmm.GaussianMixtureDistribution([(.2,-10,.5),(.6,1,1),(.2,20,.5)]) sage: loads(dumps(P)) == P # indirect doctest True """ |