From b8ef1983848520f0d83150fb7e81c58f2620c446 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 5 Jul 2016 20:03:04 -0600 Subject: [PATCH] wrote lexer... again. doesn't properly handle strings though --- lex.go | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lex_test.go | 71 +++++++++++++++++++++++++++ types.go | 27 +++++++++++ 3 files changed, 234 insertions(+) create mode 100644 lex.go create mode 100644 lex_test.go create mode 100644 types.go diff --git a/lex.go b/lex.go new file mode 100644 index 0000000..9d04007 --- /dev/null +++ b/lex.go @@ -0,0 +1,136 @@ +package ginger + +import ( + "io" + "strings" + + "github.com/mediocregopher/lexgo" +) + +const ( + number lexgo.TokenType = lexgo.UserDefined + iota + identifier + punctuation +) + +var numberSet = "0123456789" +var whitespaceSet = " \n\r\t\v\f" +var punctuationSet = ",{}()<>|" + +func newLexer(r io.Reader) *lexgo.Lexer { + return lexgo.NewLexer(r, lexWhitespace) +} + +func lexWhitespace(lexer *lexgo.Lexer) lexgo.LexerFunc { + r, err := lexer.ReadRune() + if err != nil { + return nil + } + + if strings.ContainsRune(whitespaceSet, r) { + return lexWhitespace + } + + if r == '/' { + n, err := lexer.PeekRune() + if err != nil { + return nil + } + + var lexComment func(*lexgo.Lexer) bool + if n == '/' { + lexComment = lexLineComment + } else if n == '*' { + lexComment = lexBlockComment + } + if lexComment != nil { + if !lexComment(lexer) { + return nil + } + return lexWhitespace + } + } + + lexer.BufferRune(r) + + switch { + case strings.ContainsRune(punctuationSet, r): + return lexPunctuation + case strings.ContainsRune(numberSet, r): + return lexNumber + default: + return lexIdentifier + } +} + +// assumes the punctuation has already been buffered +func lexPunctuation(lexer *lexgo.Lexer) lexgo.LexerFunc { + lexer.Emit(punctuation) + return lexWhitespace +} + +func lexGeneralExpr(lexer *lexgo.Lexer, typ lexgo.TokenType) lexgo.LexerFunc { + for { + r, err := lexer.ReadRune() + if err != nil { + return nil + } + + if strings.ContainsRune(whitespaceSet, r) { + lexer.Emit(typ) + return lexWhitespace + } + + if strings.ContainsRune(punctuationSet, r) { + lexer.Emit(typ) + lexer.BufferRune(r) + return lexPunctuation + } + + lexer.BufferRune(r) + } +} + +func lexNumber(lexer *lexgo.Lexer) lexgo.LexerFunc { + return lexGeneralExpr(lexer, number) +} + +func lexIdentifier(lexer *lexgo.Lexer) lexgo.LexerFunc { + return lexGeneralExpr(lexer, identifier) +} + +func lexLineComment(lexer *lexgo.Lexer) bool { + for { + r, err := lexer.ReadRune() + if err != nil { + return false + } else if r == '\n' { + return true + } + } +} + +func lexBlockComment(lexer *lexgo.Lexer) bool { + for { + r, err := lexer.ReadRune() + if err != nil { + return false + } + + if r == '*' || r == '/' { + n, err := lexer.PeekRune() + if err != nil { + return false + } + if r == '*' && n == '/' { + _, err = lexer.ReadRune() + return err == nil + } + if r == '/' && n == '*' { + if !lexBlockComment(lexer) { + return false + } + } + } + } +} diff --git a/lex_test.go b/lex_test.go new file mode 100644 index 0000000..b6447a9 --- /dev/null +++ b/lex_test.go @@ -0,0 +1,71 @@ +package ginger + +import ( + "bytes" + "io" + . "testing" + + "github.com/mediocregopher/lexgo" + "github.com/stretchr/testify/assert" +) + +var lexTestSrc = ` + // this is a comment + // // this is also a comment + a + anIdentifier + 1 + 100 + 1.5 + 1.5e9 + + /* block comment */ + prefix /* + Another block comment + /* Embedded */ + /* + Super embedded + */ + */ suffix + + // this one is kind of fun, technically it's a comment + /*/ + + (punctuation,is{cool}<> ) + -tab +` + +func TestLex(t *T) { + l := newLexer(bytes.NewBufferString(lexTestSrc)) + + assertNext := func(typ lexgo.TokenType, val string) { + t.Logf("asserting %q", val) + tok := l.Next() + assert.Equal(t, typ, tok.TokenType) + assert.Equal(t, val, tok.Val) + } + + assertNext(identifier, "a") + assertNext(identifier, "anIdentifier") + assertNext(number, "1") + assertNext(number, "100") + assertNext(number, "1.5") + assertNext(number, "1.5e9") + assertNext(identifier, "prefix") + assertNext(identifier, "suffix") + assertNext(punctuation, "(") + assertNext(identifier, "punctuation") + assertNext(punctuation, ",") + assertNext(identifier, "is") + assertNext(punctuation, "{") + assertNext(identifier, "cool") + assertNext(punctuation, "}") + assertNext(punctuation, "<") + assertNext(punctuation, ">") + assertNext(punctuation, ")") + assertNext(identifier, "-tab") + + tok := l.Next() + assert.Equal(t, tok.TokenType, lexgo.Err) + assert.Equal(t, tok.Err, io.EOF) +} diff --git a/types.go b/types.go new file mode 100644 index 0000000..8f42c88 --- /dev/null +++ b/types.go @@ -0,0 +1,27 @@ +package ginger + +type Expr struct { + // [0-9]+ + Int int + + // true | false + Bool bool + + // [Expr [, Expr]] + Tuple []Expr + + // { [Statement (;\s)]* } + Block []Expr + + // [Expr | Expr] + Pipeline []Expr + + // [a-z]+ + Identifier string + + // Expr > Expr + Statement *struct { + Input Expr + Into Expr + } +}