From f2986c7a7910b9e3bae265f93dcf2984b07a6059 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Sat, 23 Jul 2016 10:34:14 -0600 Subject: [PATCH] implement block comments in the lexer --- lexer/lexer.go | 36 +++++++++++++++++++++++++++++++- lexer/lexer_test.go | 50 +++++++++++++++++++++++++++++---------------- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 2fc3355..3886ffa 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "log" "strings" ) @@ -189,6 +190,9 @@ func (l *Lexer) Next() Token { return t } +//////////////////////////////////////////////////////////////////////////////// +// the actual fsm + var whitespaceSet = " \n\r\t\v\f" var punctuationSet = ",{}()<>|" var identifierSepSet = whitespaceSet + punctuationSet @@ -203,8 +207,10 @@ func lex(l *Lexer) lexerFn { // handle comments first, cause we have to peek for those. We ignore errors, // and assume that any error that would happen here will happen again the // next read - if n, _ := l.peekRune(); n == '/' { + if n, _ := l.peekRune(); r == '/' && n == '/' { return lexLineComment + } else if r == '/' && n == '*' { + return lexBlockComment } return lexSingleRune(l, r) @@ -257,6 +263,34 @@ func lexLineComment(l *Lexer) lexerFn { return lexLineComment } +// assumes the starting / has been read already +func lexBlockComment(l *Lexer) lexerFn { + depth := 1 + log.Printf("in block comment") + + var recurse lexerFn + recurse = func(l *Lexer) lexerFn { + r, err := l.readRune() + if err != nil { + l.emitErr(err) + return nil + } + n, _ := l.peekRune() + + if r == '/' && n == '*' { + depth++ + } else if r == '*' && n == '/' { + depth-- + } + + if depth == 0 { + return lexSkipThen(lex) + } + return recurse + } + return recurse +} + func lexStrStart(lexer *Lexer, r rune, then lexerFn) lexerFn { lexer.bufferRune(r) return then diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index b80d9aa..7fa0e00 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -18,6 +18,20 @@ var lexTestSrc = ` 1.5 1.5e9 + /* + some stuff + */ + + /* this should actually work */ + /*/ + + /* + nested! + /* + wtf this is crazy + */ + */ + (punctuation,is{cool}<> ) -tab @@ -46,24 +60,24 @@ func TestLex(t *T) { assertNext(Identifier, "100", 6, 2) assertNext(Identifier, "1.5", 7, 2) assertNext(Identifier, "1.5e9", 8, 2) - assertNext(Punctuation, "(", 10, 2) - assertNext(Identifier, "punctuation", 10, 3) - assertNext(Punctuation, ",", 10, 14) - assertNext(Identifier, "is", 10, 15) - assertNext(Punctuation, "{", 10, 17) - assertNext(Identifier, "cool", 10, 18) - assertNext(Punctuation, "}", 10, 22) - assertNext(Punctuation, "<", 10, 23) - assertNext(Punctuation, ">", 10, 24) - assertNext(Punctuation, ")", 10, 26) - assertNext(Identifier, "-tab", 11, 2) - assertNext(String, `"this is a string"`, 13, 2) - assertNext(Punctuation, ",", 13, 20) - assertNext(String, `"and so is this one"`, 13, 22) - assertNext(String, `"\"foo"`, 14, 2) - assertNext(String, `"bar\"baz\""`, 15, 2) - assertNext(String, `"buz\0"`, 16, 2) - assertNext(EOF, "EOF", 17, 0) + assertNext(Punctuation, "(", 24, 2) + assertNext(Identifier, "punctuation", 24, 3) + assertNext(Punctuation, ",", 24, 14) + assertNext(Identifier, "is", 24, 15) + assertNext(Punctuation, "{", 24, 17) + assertNext(Identifier, "cool", 24, 18) + assertNext(Punctuation, "}", 24, 22) + assertNext(Punctuation, "<", 24, 23) + assertNext(Punctuation, ">", 24, 24) + assertNext(Punctuation, ")", 24, 26) + assertNext(Identifier, "-tab", 25, 2) + assertNext(String, `"this is a string"`, 27, 2) + assertNext(Punctuation, ",", 27, 20) + assertNext(String, `"and so is this one"`, 27, 22) + assertNext(String, `"\"foo"`, 28, 2) + assertNext(String, `"bar\"baz\""`, 29, 2) + assertNext(String, `"buz\0"`, 30, 2) + assertNext(EOF, "EOF", 31, 0) assert.False(t, l.HasNext()) }