@@ -26,18 +26,23 @@ const eof = 0
26
26
// Signal eof with Error
27
27
const eofError = - 1
28
28
29
+ // Standard python definition of a tab
30
+ const tabSize = 8
31
+
29
32
// The parser uses the type <prefix>Lex as a lexer. It must provide
30
33
// the methods Lex(*<prefix>SymType) int and Error(string).
31
34
type yyLex struct {
32
35
reader * bufio.Reader
33
36
line string // current line being parsed
34
37
eof bool // flag to show EOF was read
38
+ error bool // set if an error has ocurred
35
39
indentStack []int // indent stack to control INDENT / DEDENT tokens
36
40
state int // current state of state machine
37
41
currentIndent string // whitespace at start of current line
38
- indentSpace bool // whether we are indenting with spaces
39
- indentTab bool // whether we are indenting with tabs
40
42
interactive bool // set if reading interactive input
43
+ bracket int // number of open [ ]
44
+ parenthesis int // number of open ( )
45
+ brace int // number of open { }
41
46
}
42
47
43
48
func NewLex (r io.Reader ) * yyLex {
@@ -76,28 +81,29 @@ func (x *yyLex) countIndent(s string) int {
76
81
// mixes tabs and spaces in a way that makes the meaning
77
82
// dependent on the worth of a tab in spaces; a TabError is
78
83
// raised in that case
79
- if ! x .indentSpace && ! x .indentTab {
80
- switch s [0 ] {
84
+ indent := 0
85
+ for _ , c := range s {
86
+ switch c {
81
87
case ' ' :
82
- x . indentSpace = true
88
+ indent ++
83
89
case '\t' :
84
- x .indentTab = true
90
+ // 012345678901234567
91
+ // a b
92
+ // a b
93
+ // a b
94
+ // a b
95
+ // a b
96
+ // a b
97
+ // a b
98
+ // ab
99
+ // a b
100
+ indent += tabSize - (indent & (tabSize - 1 ))
85
101
default :
86
102
panic ("bad indent" )
87
103
}
104
+
88
105
}
89
- if x .indentSpace {
90
- if strings .ContainsRune (s , '\t' ) {
91
- x .Error ("Inconsistent indent" )
92
- }
93
- } else if x .indentTab {
94
- if strings .ContainsRune (s , ' ' ) {
95
- x .Error ("Inconsistent indent" )
96
- }
97
- } else {
98
- panic ("indent not set" )
99
- }
100
- return len (s )
106
+ return indent
101
107
}
102
108
103
109
var operators = map [string ]int {
@@ -213,6 +219,11 @@ func init() {
213
219
tokenToString [NUMBER ] = "NUMBER"
214
220
}
215
221
222
+ // True if there are any open brackets
223
+ func (x * yyLex ) openBrackets () bool {
224
+ return x .bracket != 0 || x .parenthesis != 0 || x .brace != 0
225
+ }
226
+
216
227
// States
217
228
const (
218
229
readString = iota
@@ -243,11 +254,15 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
243
254
case readString :
244
255
// Read x.line
245
256
x .refill ()
257
+ x .state ++
246
258
// an empty line while reading interactive input should return a NEWLINE
247
259
if x .interactive && (x .line == "" || x .line == "\n " ) {
260
+ // Don't output NEWLINE if brackets are open
261
+ if x .openBrackets () {
262
+ continue
263
+ }
248
264
return NEWLINE
249
265
}
250
- x .state ++
251
266
case readIndent :
252
267
// Read the initial indent and get rid of it
253
268
trimmed := strings .TrimLeft (x .line , " \t " )
@@ -262,6 +277,11 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
262
277
}
263
278
x .state ++
264
279
case checkIndent :
280
+ // Don't output INDENT or DEDENT if brackets are open
281
+ if x .openBrackets () {
282
+ x .state ++
283
+ continue
284
+ }
265
285
// See if indent has changed and issue INDENT / DEDENT
266
286
indent := x .countIndent (x .currentIndent )
267
287
indentStackTop := x .indentStack [len (x .indentStack )- 1 ]
@@ -293,9 +313,13 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
293
313
continue
294
314
}
295
315
296
- // Check if newline reached
297
- if x .line [0 ] == '\n' {
316
+ // Check if newline or comment reached
317
+ if x .line [0 ] == '\n' || x . line [ 0 ] == '#' {
298
318
x .state = checkEof
319
+ // Don't output NEWLINE if brackets are open
320
+ if x .openBrackets () {
321
+ continue
322
+ }
299
323
return NEWLINE
300
324
}
301
325
@@ -329,6 +353,21 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
329
353
// Read an operator if available
330
354
token = x .readOperator ()
331
355
if token != eof {
356
+ // implement implicit line joining rules
357
+ switch token {
358
+ case '[' :
359
+ x .bracket ++
360
+ case ']' :
361
+ x .bracket --
362
+ case '(' :
363
+ x .parenthesis ++
364
+ case ')' :
365
+ x .parenthesis --
366
+ case '{' :
367
+ x .brace ++
368
+ case '}' :
369
+ x .brace --
370
+ }
332
371
return token
333
372
}
334
373
@@ -645,6 +684,7 @@ foundEndOfString:
645
684
646
685
// The parser calls this method on a parse error.
647
686
func (x * yyLex ) Error (s string ) {
687
+ x .error = true
648
688
log .Printf ("Parse error: %s" , s )
649
689
log .Printf ("Parse buffer %q" , x .line )
650
690
log .Printf ("State %#v" , x )
@@ -656,12 +696,17 @@ func SetDebug(level int) {
656
696
}
657
697
658
698
// Parse a file
659
- func Parse (in io.Reader ) {
660
- yyParse (NewLex (in ))
699
+ func Parse (in io.Reader ) error {
700
+ lex := NewLex (in )
701
+ yyParse (lex )
702
+ if lex .error {
703
+ return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
704
+ }
705
+ return nil
661
706
}
662
707
663
708
// Lex a file only
664
- func Lex (in io.Reader ) {
709
+ func Lex (in io.Reader ) error {
665
710
lex := NewLex (in )
666
711
yylval := yySymType {}
667
712
for {
@@ -670,4 +715,8 @@ func Lex(in io.Reader) {
670
715
break
671
716
}
672
717
}
718
+ if lex .error {
719
+ return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
720
+ }
721
+ return nil
673
722
}
0 commit comments