Skip to content

Commit 4d27ead

Browse files
committed
Implement implicit line joining
* Redo indentation count code - better but still not 100% * Detect errors and return an error on Parse
1 parent acd7411 commit 4d27ead

File tree

2 files changed

+78
-26
lines changed

2 files changed

+78
-26
lines changed

parser/lexer.go

Lines changed: 73 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,23 @@ const eof = 0
2626
// Signal eof with Error
2727
const eofError = -1
2828

29+
// Standard python definition of a tab
30+
const tabSize = 8
31+
2932
// The parser uses the type <prefix>Lex as a lexer. It must provide
3033
// the methods Lex(*<prefix>SymType) int and Error(string).
3134
type yyLex struct {
3235
reader *bufio.Reader
3336
line string // current line being parsed
3437
eof bool // flag to show EOF was read
38+
error bool // set if an error has ocurred
3539
indentStack []int // indent stack to control INDENT / DEDENT tokens
3640
state int // current state of state machine
3741
currentIndent string // whitespace at start of current line
38-
indentSpace bool // whether we are indenting with spaces
39-
indentTab bool // whether we are indenting with tabs
4042
interactive bool // set if reading interactive input
43+
bracket int // number of open [ ]
44+
parenthesis int // number of open ( )
45+
brace int // number of open { }
4146
}
4247

4348
func NewLex(r io.Reader) *yyLex {
@@ -76,28 +81,29 @@ func (x *yyLex) countIndent(s string) int {
7681
// mixes tabs and spaces in a way that makes the meaning
7782
// dependent on the worth of a tab in spaces; a TabError is
7883
// raised in that case
79-
if !x.indentSpace && !x.indentTab {
80-
switch s[0] {
84+
indent := 0
85+
for _, c := range s {
86+
switch c {
8187
case ' ':
82-
x.indentSpace = true
88+
indent++
8389
case '\t':
84-
x.indentTab = true
90+
// 012345678901234567
91+
// a b
92+
// a b
93+
// a b
94+
// a b
95+
// a b
96+
// a b
97+
// a b
98+
// ab
99+
// a b
100+
indent += tabSize - (indent & (tabSize - 1))
85101
default:
86102
panic("bad indent")
87103
}
104+
88105
}
89-
if x.indentSpace {
90-
if strings.ContainsRune(s, '\t') {
91-
x.Error("Inconsistent indent")
92-
}
93-
} else if x.indentTab {
94-
if strings.ContainsRune(s, ' ') {
95-
x.Error("Inconsistent indent")
96-
}
97-
} else {
98-
panic("indent not set")
99-
}
100-
return len(s)
106+
return indent
101107
}
102108

103109
var operators = map[string]int{
@@ -213,6 +219,11 @@ func init() {
213219
tokenToString[NUMBER] = "NUMBER"
214220
}
215221

222+
// True if there are any open brackets
223+
func (x *yyLex) openBrackets() bool {
224+
return x.bracket != 0 || x.parenthesis != 0 || x.brace != 0
225+
}
226+
216227
// States
217228
const (
218229
readString = iota
@@ -243,11 +254,15 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
243254
case readString:
244255
// Read x.line
245256
x.refill()
257+
x.state++
246258
// an empty line while reading interactive input should return a NEWLINE
247259
if x.interactive && (x.line == "" || x.line == "\n") {
260+
// Don't output NEWLINE if brackets are open
261+
if x.openBrackets() {
262+
continue
263+
}
248264
return NEWLINE
249265
}
250-
x.state++
251266
case readIndent:
252267
// Read the initial indent and get rid of it
253268
trimmed := strings.TrimLeft(x.line, " \t")
@@ -262,6 +277,11 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
262277
}
263278
x.state++
264279
case checkIndent:
280+
// Don't output INDENT or DEDENT if brackets are open
281+
if x.openBrackets() {
282+
x.state++
283+
continue
284+
}
265285
// See if indent has changed and issue INDENT / DEDENT
266286
indent := x.countIndent(x.currentIndent)
267287
indentStackTop := x.indentStack[len(x.indentStack)-1]
@@ -293,9 +313,13 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
293313
continue
294314
}
295315

296-
// Check if newline reached
297-
if x.line[0] == '\n' {
316+
// Check if newline or comment reached
317+
if x.line[0] == '\n' || x.line[0] == '#' {
298318
x.state = checkEof
319+
// Don't output NEWLINE if brackets are open
320+
if x.openBrackets() {
321+
continue
322+
}
299323
return NEWLINE
300324
}
301325

@@ -329,6 +353,21 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
329353
// Read an operator if available
330354
token = x.readOperator()
331355
if token != eof {
356+
// implement implicit line joining rules
357+
switch token {
358+
case '[':
359+
x.bracket++
360+
case ']':
361+
x.bracket--
362+
case '(':
363+
x.parenthesis++
364+
case ')':
365+
x.parenthesis--
366+
case '{':
367+
x.brace++
368+
case '}':
369+
x.brace--
370+
}
332371
return token
333372
}
334373

@@ -645,6 +684,7 @@ foundEndOfString:
645684

646685
// The parser calls this method on a parse error.
647686
func (x *yyLex) Error(s string) {
687+
x.error = true
648688
log.Printf("Parse error: %s", s)
649689
log.Printf("Parse buffer %q", x.line)
650690
log.Printf("State %#v", x)
@@ -656,12 +696,17 @@ func SetDebug(level int) {
656696
}
657697

658698
// Parse a file
659-
func Parse(in io.Reader) {
660-
yyParse(NewLex(in))
699+
func Parse(in io.Reader) error {
700+
lex := NewLex(in)
701+
yyParse(lex)
702+
if lex.error {
703+
return py.ExceptionNewf(py.SyntaxError, "Syntax Error")
704+
}
705+
return nil
661706
}
662707

663708
// Lex a file only
664-
func Lex(in io.Reader) {
709+
func Lex(in io.Reader) error {
665710
lex := NewLex(in)
666711
yylval := yySymType{}
667712
for {
@@ -670,4 +715,8 @@ func Lex(in io.Reader) {
670715
break
671716
}
672717
}
718+
if lex.error {
719+
return py.ExceptionNewf(py.SyntaxError, "Syntax Error")
720+
}
721+
return nil
673722
}

parser/testparser/testparser.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@ func main() {
2929
}
3030
fmt.Printf("-----------------\n")
3131
if *lex {
32-
parser.Lex(in)
32+
err = parser.Lex(in)
3333
} else {
34-
parser.Parse(in)
34+
err = parser.Parse(in)
3535
}
3636
fmt.Printf("-----------------\n")
3737
in.Close()
38+
if err != nil {
39+
log.Fatalf("Failed on %q: %v", path, err)
40+
}
3841
}
3942
}

0 commit comments

Comments
 (0)