wrote lexer... again. doesn't properly handle strings though

This commit is contained in:
Brian Picciano 2016-07-05 20:03:04 -06:00
parent 4f9baf7514
commit b8ef198384
3 changed files with 234 additions and 0 deletions

136
lex.go Normal file
View File

@ -0,0 +1,136 @@
package ginger
import (
"io"
"strings"
"github.com/mediocregopher/lexgo"
)
const (
number lexgo.TokenType = lexgo.UserDefined + iota
identifier
punctuation
)
var numberSet = "0123456789"
var whitespaceSet = " \n\r\t\v\f"
var punctuationSet = ",{}()<>|"
func newLexer(r io.Reader) *lexgo.Lexer {
return lexgo.NewLexer(r, lexWhitespace)
}
func lexWhitespace(lexer *lexgo.Lexer) lexgo.LexerFunc {
r, err := lexer.ReadRune()
if err != nil {
return nil
}
if strings.ContainsRune(whitespaceSet, r) {
return lexWhitespace
}
if r == '/' {
n, err := lexer.PeekRune()
if err != nil {
return nil
}
var lexComment func(*lexgo.Lexer) bool
if n == '/' {
lexComment = lexLineComment
} else if n == '*' {
lexComment = lexBlockComment
}
if lexComment != nil {
if !lexComment(lexer) {
return nil
}
return lexWhitespace
}
}
lexer.BufferRune(r)
switch {
case strings.ContainsRune(punctuationSet, r):
return lexPunctuation
case strings.ContainsRune(numberSet, r):
return lexNumber
default:
return lexIdentifier
}
}
// assumes the punctuation has already been buffered
func lexPunctuation(lexer *lexgo.Lexer) lexgo.LexerFunc {
lexer.Emit(punctuation)
return lexWhitespace
}
func lexGeneralExpr(lexer *lexgo.Lexer, typ lexgo.TokenType) lexgo.LexerFunc {
for {
r, err := lexer.ReadRune()
if err != nil {
return nil
}
if strings.ContainsRune(whitespaceSet, r) {
lexer.Emit(typ)
return lexWhitespace
}
if strings.ContainsRune(punctuationSet, r) {
lexer.Emit(typ)
lexer.BufferRune(r)
return lexPunctuation
}
lexer.BufferRune(r)
}
}
func lexNumber(lexer *lexgo.Lexer) lexgo.LexerFunc {
return lexGeneralExpr(lexer, number)
}
func lexIdentifier(lexer *lexgo.Lexer) lexgo.LexerFunc {
return lexGeneralExpr(lexer, identifier)
}
func lexLineComment(lexer *lexgo.Lexer) bool {
for {
r, err := lexer.ReadRune()
if err != nil {
return false
} else if r == '\n' {
return true
}
}
}
func lexBlockComment(lexer *lexgo.Lexer) bool {
for {
r, err := lexer.ReadRune()
if err != nil {
return false
}
if r == '*' || r == '/' {
n, err := lexer.PeekRune()
if err != nil {
return false
}
if r == '*' && n == '/' {
_, err = lexer.ReadRune()
return err == nil
}
if r == '/' && n == '*' {
if !lexBlockComment(lexer) {
return false
}
}
}
}
}

71
lex_test.go Normal file
View File

@ -0,0 +1,71 @@
package ginger
import (
"bytes"
"io"
. "testing"
"github.com/mediocregopher/lexgo"
"github.com/stretchr/testify/assert"
)
var lexTestSrc = `
// this is a comment
// // this is also a comment
a
anIdentifier
1
100
1.5
1.5e9
/* block comment */
prefix /*
Another block comment
/* Embedded */
/*
Super embedded
*/
*/ suffix
// this one is kind of fun, technically it's a comment
/*/
(punctuation,is{cool}<> )
-tab
`
func TestLex(t *T) {
l := newLexer(bytes.NewBufferString(lexTestSrc))
assertNext := func(typ lexgo.TokenType, val string) {
t.Logf("asserting %q", val)
tok := l.Next()
assert.Equal(t, typ, tok.TokenType)
assert.Equal(t, val, tok.Val)
}
assertNext(identifier, "a")
assertNext(identifier, "anIdentifier")
assertNext(number, "1")
assertNext(number, "100")
assertNext(number, "1.5")
assertNext(number, "1.5e9")
assertNext(identifier, "prefix")
assertNext(identifier, "suffix")
assertNext(punctuation, "(")
assertNext(identifier, "punctuation")
assertNext(punctuation, ",")
assertNext(identifier, "is")
assertNext(punctuation, "{")
assertNext(identifier, "cool")
assertNext(punctuation, "}")
assertNext(punctuation, "<")
assertNext(punctuation, ">")
assertNext(punctuation, ")")
assertNext(identifier, "-tab")
tok := l.Next()
assert.Equal(t, tok.TokenType, lexgo.Err)
assert.Equal(t, tok.Err, io.EOF)
}

27
types.go Normal file
View File

@ -0,0 +1,27 @@
package ginger
type Expr struct {
// [0-9]+
Int int
// true | false
Bool bool
// [Expr [, Expr]]
Tuple []Expr
// { [Statement (;\s)]* }
Block []Expr
// [Expr | Expr]
Pipeline []Expr
// [a-z]+
Identifier string
// Expr > Expr
Statement *struct {
Input Expr
Into Expr
}
}