diff --git a/parse/lex/lex.go b/parse/lex/lex.go index 828afc2..a1e95e7 100644 --- a/parse/lex/lex.go +++ b/parse/lex/lex.go @@ -40,6 +40,18 @@ type Token struct { Val string } +// Returns the token's value as an error, or nil if the token is not of type +// Err. If the token is nil returns io.EOF, since that is the ostensible meaning +func (t *Token) AsError() error { + if t == nil { + return io.EOF + } + if t.Type != Err { + return nil + } + return errors.New(t.Val) +} + var ( errInvalidUTF8 = errors.New("invalid utf8 character") ) @@ -96,10 +108,9 @@ func (l *Lexer) Next() *Token { func (l *Lexer) emit(t TokenType) { str := l.outbuf.String() - fmt.Printf("emitting %q\n", str) l.ch <- &Token{ Type: t, - Val: l.outbuf.String(), + Val: str, } l.outbuf.Reset() } @@ -151,11 +162,9 @@ func lexWhitespace(l *Lexer) lexerFunc { } if unicode.IsSpace(r) { - fmt.Printf("skipping %q because it's a space\n", r) return lexWhitespace } - fmt.Printf("not skipping %q\n", r) l.outbuf.WriteRune(r) switch r { @@ -192,7 +201,6 @@ func lexQuotedString(l *Lexer) lexerFunc { if r == '"' && buf[len(buf) - 2] != '\\' { l.emit(QuotedString) - fmt.Println("emitting quoted string, parsing whitespace") return lexWhitespace } return lexQuotedString @@ -201,7 +209,6 @@ func lexQuotedString(l *Lexer) lexerFunc { func lexBareString(l *Lexer) lexerFunc { r, err := l.peek() if err != nil { - fmt.Printf("got err %s in peek\n", err) l.emit(BareString) return l.err(err) } @@ -212,7 +219,6 @@ func lexBareString(l *Lexer) lexerFunc { } if _, err = l.readRune(); err != nil { - fmt.Printf("got err %s in read\n", err) l.emit(BareString) return l.err(err) } diff --git a/parse/parse.go b/parse/parse.go index 5956e69..3243032 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -1,4 +1,139 @@ package parse import ( + "io" + "fmt" + "strconv" + "unsafe" + + "github.com/mediocregopher/ginger/parse/lex" + "github.com/mediocregopher/ginger/seq" + "github.com/mediocregopher/ginger/types" ) + +const int_bits = int(unsafe.Sizeof(int(0)) * 8) + +var closers = map[string]string{ + "(": ")", + "[": "]", + "{": "}", +} + +// The lexer only indicates a bare string, but technically an integer or a float +// is a bare string so we must try and convert to one of those first +func parseBareString(tok *lex.Token) types.Elem { + if i, err := strconv.ParseInt(tok.Val, 10, int_bits); err != nil { + return types.GoType{int(i)} + } + + if i64, err := strconv.ParseInt(tok.Val, 10, 64); err != nil { + return types.GoType{int64(i64)} + } + + if f32, err := strconv.ParseInt(tok.Val, 10, 32); err != nil { + return types.GoType{float32(f32)} + } + + if f64, err := strconv.ParseInt(tok.Val, 10, 64); err != nil { + return types.GoType{float64(f64)} + } + + if tok.Val[0] != ':' { + return types.GoType{":"+tok.Val} + } + + return types.GoType{tok.Val} +} + +func parseQuotedString(tok *lex.Token) (types.Elem, error) { + s, err := strconv.Unquote(tok.Val) + if err != nil { + return nil, err + } + + return types.GoType{s}, nil +} + +type Parser struct { + l *lex.Lexer +} + +func NewParser(r io.Reader) *Parser { + p := Parser{ + l: lex.NewLexer(r), + } + return &p +} + +func (p *Parser) ReadElem() (types.Elem, error) { + tok := p.l.Next() + return p.parseToken(tok) +} + +func (p *Parser) parseToken(tok *lex.Token) (types.Elem, error) { + if tok == nil { + return nil, io.EOF + } + + switch tok.Type { + case lex.Err: + return nil, tok.AsError() + case lex.BareString: + return parseBareString(tok), nil + case lex.QuotedString: + return parseQuotedString(tok) + case lex.Open: + series, err := p.readUntil(closers[tok.Val]) + if err != nil { + return nil, err + } + if tok.Val == "(" { + return seq.NewList(series...), nil + } else if tok.Val == "{" { + if len(series) % 2 != 0 { + return nil, fmt.Errorf("hash must have even number of elements") + } + kvs := make([]*seq.KV, 0, len(series) / 2) + for i := 0; i < len(series); i += 2 { + kv := seq.KV{series[i], series[i+1]} + kvs = append(kvs, &kv) + } + return seq.NewHashMap(kvs...), nil + } + + panic("should never get here") + + default: + return nil, fmt.Errorf("Unexpected %q", tok.Val) + } +} + + +func (p *Parser) readUntil(closer string) ([]types.Elem, error) { + series := make([]types.Elem, 0, 4) + for { + tok := p.l.Next() + switch err := tok.AsError(); err { + case nil: + case io.EOF: + return nil, fmt.Errorf("Unexpected EOF") + default: + return nil, err + } + + if tok.Type != lex.Close { + e, err := p.parseToken(tok) + if err != nil { + return nil, err + } + series = append(series, e) + continue + } + + if tok.Val != closer { + return nil, fmt.Errorf("Unexpected %q", tok.Val) + } + + return series, nil + } +} diff --git a/parse/parse_test.go b/parse/parse_test.go index e3f6767..aa4f6a4 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -2,4 +2,11 @@ package parse import ( . "testing" + + //"github.com/mediocregopher/ginger/seq" + //"github.com/mediocregopher/ginger/types" ) + +func TestParseBareString(t *T) { + t.Fatal() +}