Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

""" 

Hidden Markov Models -- Utility functions 

  

AUTHOR: 

  

- William Stein, 2010-03 

""" 

  

############################################################################# 

# Copyright (C) 2010 William Stein <wstein@gmail.com> 

# Distributed under the terms of the GNU General Public License (GPL) v2+. 

# The full text of the GPL is available at: 

# http://www.gnu.org/licenses/ 

############################################################################# 

  

  

from sage.structure.element import is_Matrix 

from sage.misc.flatten import flatten 

  

cdef class HMM_Util: 

""" 

A class used in order to share cdef's methods between different files. 

""" 

cpdef normalize_probability_TimeSeries(self, TimeSeries T, Py_ssize_t i, Py_ssize_t j): 

""" 

This function is used internally by the Hidden Markov Models code. 

  

Replace entries of T[i:j] in place so that they are all 

nonnegative and sum to 1. Negative entries are replaced by 0 and 

T[i:j] is then rescaled to ensure that the sum of the entries in 

each row is equal to 1. If all entries are 0, replace them 

by 1/(j-i). 

  

INPUT: 

  

- T -- a TimeSeries 

- i -- nonnegative integer 

- j -- nonnegative integer 

  

OUTPUT: 

  

- T is modified 

  

EXAMPLES:: 

  

sage: import sage.stats.hmm.util 

sage: T = stats.TimeSeries([.1, .3, .7, .5]) 

sage: u = sage.stats.hmm.util.HMM_Util() 

sage: u.normalize_probability_TimeSeries(T,0,3) 

sage: T 

[0.0909, 0.2727, 0.6364, 0.5000] 

sage: u.normalize_probability_TimeSeries(T,0,4) 

sage: T 

[0.0606, 0.1818, 0.4242, 0.3333] 

sage: abs(T.sum()-1) < 1e-8 # might not exactly equal 1 due to rounding 

True 

""" 

# One single bounds check only 

if i < 0 or j < 0 or i > T._length or j > T._length: 

raise IndexError 

  

if j-i <= 0: 

# Nothing to do 

return 

  

cdef Py_ssize_t k 

  

# Replace negative entries by 0, summing entries 

cdef double s = 0, t 

for k in range(i,j): 

if T._values[k] < 0: 

T._values[k] = 0 

else: 

s += T._values[k] 

  

if s == 0: 

# If sum is 0, make all entries 1/(j-i). 

t = 1.0/(j-i) 

for k in range(i,j): 

T._values[k] = t 

else: 

# Normalise so sum is 1. 

for k in range(i, j): 

T._values[k] /= s 

  

  

  

cpdef TimeSeries initial_probs_to_TimeSeries(self, pi, bint normalize): 

""" 

This function is used internally by the __init__ methods of 

various Hidden Markov Models. 

  

INPUT: 

  

- pi -- vector, list, or TimeSeries 

- normalize -- if True, replace negative entries by 0 and 

rescale to ensure that the sum of the entries in each row is 

equal to 1. If the sum of the entries in a row is 0, replace them 

all by 1/N. 

  

OUTPUT: 

- a TimeSeries of length N 

  

EXAMPLES:: 

  

sage: import sage.stats.hmm.util 

sage: u = sage.stats.hmm.util.HMM_Util() 

sage: u.initial_probs_to_TimeSeries([0.1,0.2,0.9], True) 

[0.0833, 0.1667, 0.7500] 

sage: u.initial_probs_to_TimeSeries([0.1,0.2,0.9], False) 

[0.1000, 0.2000, 0.9000] 

""" 

cdef TimeSeries T 

if isinstance(pi, TimeSeries): 

T = pi 

else: 

if not isinstance(pi, list): 

pi = list(pi) 

T = TimeSeries(pi) 

if normalize: 

# Now normalize 

self.normalize_probability_TimeSeries(T, 0, T._length) 

return T 

  

  

cpdef TimeSeries state_matrix_to_TimeSeries(self, A, int N, bint normalize): 

""" 

This function is used internally by the __init__ methods of 

Hidden Markov Models to make a transition matrix from A. 

  

  

INPUT: 

  

- A -- matrix, list, list of lists, or TimeSeries 

- N -- number of states 

- normalize -- if True, replace negative entries by 0 and 

rescale to ensure that the sum of the entries in each row is 

equal to 1. If the sum of the entries in a row is 0, replace them 

all by 1/N. 

  

OUTPUT: 

  

- a TimeSeries 

  

EXAMPLES:: 

  

sage: import sage.stats.hmm.util 

sage: u = sage.stats.hmm.util.HMM_Util() 

sage: u.state_matrix_to_TimeSeries([[.1,.7],[3/7,4/7]], 2, True) 

[0.1250, 0.8750, 0.4286, 0.5714] 

sage: u.state_matrix_to_TimeSeries([[.1,.7],[3/7,4/7]], 2, False) 

[0.1000, 0.7000, 0.4286, 0.5714] 

""" 

cdef TimeSeries T 

if isinstance(A, TimeSeries): 

T = A 

elif is_Matrix(A): 

T = TimeSeries(A.list()) 

elif isinstance(A, list): 

T = TimeSeries(flatten(A)) 

else: 

T = TimeSeries(A) 

cdef Py_ssize_t i 

if normalize: 

# Set to 0 negative rows and make sure sum of entries in each 

# row is 1. 

if len(T) != N*N: 

raise ValueError("number of entries of transition matrix A must be the square of the number of entries of pi") 

for i in range(N): 

self.normalize_probability_TimeSeries(T, i*N, (i+1)*N) 

return T