Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 

# Licensed to PSF under a Contributor Agreement. 

 

"""Parser engine for the grammar tables generated by pgen. 

 

The grammar table must be loaded first. 

 

See Parser/parser.c in the Python distribution for additional info on 

how this parsing engine works. 

 

""" 

 

# Local imports 

from sphinx.pycode.pgen2 import token 

 

if False: 

# For type annotation 

from typing import Any, List, Set, Tuple # NOQA 

 

class ParseError(Exception): 

"""Exception to signal the parser is stuck.""" 

 

def __init__(self, msg, type, value, context): 

Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % 

(msg, type, value, context)) 

self.msg = msg 

self.type = type 

self.value = value 

self.context = context 

 

class Parser(object): 

"""Parser engine. 

 

The proper usage sequence is: 

 

p = Parser(grammar, [converter]) # create instance 

p.setup([start]) # prepare for parsing 

<for each input token>: 

if p.addtoken(...): # parse a token; may raise ParseError 

break 

root = p.rootnode # root of abstract syntax tree 

 

A Parser instance may be reused by calling setup() repeatedly. 

 

A Parser instance contains state pertaining to the current token 

sequence, and should not be used concurrently by different threads 

to parse separate token sequences. 

 

See driver.py for how to get input tokens by tokenizing a file or 

string. 

 

Parsing is complete when addtoken() returns True; the root of the 

abstract syntax tree can then be retrieved from the rootnode 

instance variable. When a syntax error occurs, addtoken() raises 

the ParseError exception. There is no error recovery; the parser 

cannot be used after a syntax error was reported (but it can be 

reinitialized by calling setup()). 

 

""" 

 

def __init__(self, grammar, convert=None): 

"""Constructor. 

 

The grammar argument is a grammar.Grammar instance; see the 

grammar module for more information. 

 

The parser is not ready yet for parsing; you must call the 

setup() method to get it started. 

 

The optional convert argument is a function mapping concrete 

syntax tree nodes to abstract syntax tree nodes. If not 

given, no conversion is done and the syntax tree produced is 

the concrete syntax tree. If given, it must be a function of 

two arguments, the first being the grammar (a grammar.Grammar 

instance), and the second being the concrete syntax tree node 

to be converted. The syntax tree is converted from the bottom 

up. 

 

A concrete syntax tree node is a (type, value, context, nodes) 

tuple, where type is the node type (a token or symbol number), 

value is None for symbols and a string for tokens, context is 

None or an opaque value used for error reporting (typically a 

(lineno, offset) pair), and nodes is a list of children for 

symbols, and None for tokens. 

 

An abstract syntax tree node may be anything; this is entirely 

up to the converter function. 

 

""" 

self.grammar = grammar 

self.convert = convert or (lambda grammar, node: node) 

 

def setup(self, start=None): 

"""Prepare for parsing. 

 

This *must* be called before starting to parse. 

 

The optional argument is an alternative start symbol; it 

defaults to the grammar's start symbol. 

 

You can use a Parser instance to parse any number of programs; 

each time you call setup() the parser is reset to an initial 

state determined by the (implicit or explicit) start symbol. 

 

""" 

if start is None: 

start = self.grammar.start 

# Each stack entry is a tuple: (dfa, state, node). 

# A node is a tuple: (type, value, context, children), 

# where children is a list of nodes or None, and context may be None. 

newnode = (start, None, None, []) # type: Tuple[unicode, unicode, unicode, List] 

stackentry = (self.grammar.dfas[start], 0, newnode) 

self.stack = [stackentry] 

self.rootnode = None # type: Any 

self.used_names = set() # type: Set[unicode] 

# Aliased to self.rootnode.used_names in pop() 

 

def addtoken(self, type, value, context): 

"""Add a token; return True iff this is the end of the program.""" 

# Map from token to label 

ilabel = self.classify(type, value, context) 

# Loop until the token is shifted; may raise exceptions 

while True: 

dfa, state, node = self.stack[-1] 

states, first = dfa 

arcs = states[state] 

# Look for a state with this label 

for i, newstate in arcs: 

t, v = self.grammar.labels[i] 

if ilabel == i: 

# Look it up in the list of labels 

assert t < 256 

# Shift a token; we're done with it 

self.shift(type, value, newstate, context) 

# Pop while we are in an accept-only state 

state = newstate 

while states[state] == [(0, state)]: 

self.pop() 

if not self.stack: 

# Done parsing! 

return True 

dfa, state, node = self.stack[-1] 

states, first = dfa 

# Done with this token 

return False 

elif t >= 256: 

# See if it's a symbol and if we're in its first set 

itsdfa = self.grammar.dfas[t] 

itsstates, itsfirst = itsdfa 

if ilabel in itsfirst: 

# Push a symbol 

self.push(t, self.grammar.dfas[t], newstate, context) 

break # To continue the outer while loop 

else: 

if (0, state) in arcs: 

# An accepting state, pop it and try something else 

self.pop() 

if not self.stack: 

# Done parsing, but another token is input 

raise ParseError("too much input", 

type, value, context) 

else: 

# No success finding a transition 

raise ParseError("bad input", type, value, context) 

 

def classify(self, type, value, context): 

"""Turn a token into a label. (Internal)""" 

if type == token.NAME: 

# Keep a listing of all used names 

self.used_names.add(value) 

# Check for reserved words 

ilabel = self.grammar.keywords.get(value) 

if ilabel is not None: 

return ilabel 

ilabel = self.grammar.tokens.get(type) 

if ilabel is None: 

raise ParseError("bad token", type, value, context) 

return ilabel 

 

def shift(self, type, value, newstate, context): 

"""Shift a token. (Internal)""" 

dfa, state, node = self.stack[-1] 

newnode = (type, value, context, None) # type: Tuple[unicode, unicode, unicode, List] 

newnode = self.convert(self.grammar, newnode) 

if newnode is not None: 

node[-1].append(newnode) 

self.stack[-1] = (dfa, newstate, node) 

 

def push(self, type, newdfa, newstate, context): 

"""Push a nonterminal. (Internal)""" 

dfa, state, node = self.stack[-1] 

newnode = (type, None, context, []) # type: Tuple[unicode, unicode, unicode, List] 

self.stack[-1] = (dfa, newstate, node) 

self.stack.append((newdfa, 0, newnode)) 

 

def pop(self): 

"""Pop a nonterminal. (Internal)""" 

popdfa, popstate, popnode = self.stack.pop() 

newnode = self.convert(self.grammar, popnode) 

if newnode is not None: 

if self.stack: 

dfa, state, node = self.stack[-1] 

node[-1].append(newnode) 

else: 

self.rootnode = newnode 

self.rootnode.used_names = self.used_names