285 lines
5.6 KiB
Go
285 lines
5.6 KiB
Go
package gg
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// LexerError is returned by Lexer when an unexpected error occurs parsing a
|
|
// stream of LexerTokens.
|
|
type LexerError struct {
|
|
Err error
|
|
Row, Col int
|
|
}
|
|
|
|
func (e *LexerError) Error() string {
|
|
return fmt.Sprintf("%d: %d: %s", e.Col, e.Row, e.Err.Error())
|
|
}
|
|
|
|
func (e *LexerError) Unwrap() error {
|
|
return e.Err
|
|
}
|
|
|
|
// LexerTokenKind enumerates the different kinds of LexerToken there can be.
|
|
type LexerTokenKind string
|
|
|
|
// Enumeration of LexerTokenKinds.
|
|
const (
|
|
LexerTokenKindName LexerTokenKind = "name"
|
|
LexerTokenKindNumber LexerTokenKind = "number"
|
|
LexerTokenKindPunctuation LexerTokenKind = "punctuation"
|
|
)
|
|
|
|
// LexerToken describes a lexigraphical token which is used when deserializing
|
|
// Graphs.
|
|
type LexerToken struct {
|
|
Kind LexerTokenKind
|
|
Value string // never empty string
|
|
|
|
Row, Col int
|
|
}
|
|
|
|
// Lexer is used to parse a string stream into a sequence of tokens which can
|
|
// then be parsed by a Parser.
|
|
type Lexer interface {
|
|
|
|
// Next will return a LexerToken or a LexerError. io.EOF (wrapped in a
|
|
// LexerError) is returned if the stream being read from is finished.
|
|
Next() (LexerToken, error)
|
|
}
|
|
|
|
type lexer struct {
|
|
r *bufio.Reader
|
|
stringBuilder *strings.Builder
|
|
err *LexerError
|
|
|
|
// these fields are only needed to keep track of the current "cursor"
|
|
// position when reading.
|
|
lastRow, lastCol int
|
|
prevRune rune
|
|
}
|
|
|
|
// NewLexer wraps the io.Reader in a Lexer, which will read the io.Reader as a
|
|
// sequence of utf-8 characters and parse it into a sequence of LexerTokens.
|
|
func NewLexer(r io.Reader) Lexer {
|
|
return &lexer{
|
|
r: bufio.NewReader(r),
|
|
lastRow: 0,
|
|
lastCol: -1,
|
|
stringBuilder: new(strings.Builder),
|
|
}
|
|
}
|
|
|
|
// nextRowCol returns the row and column number which the next rune in the
|
|
// stream would be at.
|
|
func (l *lexer) nextRowCol() (int, int) {
|
|
|
|
if l.prevRune == '\n' {
|
|
return l.lastRow + 1, 0
|
|
}
|
|
|
|
return l.lastRow, l.lastCol + 1
|
|
}
|
|
|
|
func (l *lexer) fmtErr(err error) *LexerError {
|
|
|
|
row, col := l.nextRowCol()
|
|
|
|
return &LexerError{
|
|
Err: err,
|
|
Row: row,
|
|
Col: col,
|
|
}
|
|
}
|
|
|
|
func (l *lexer) fmtErrf(str string, args ...interface{}) *LexerError {
|
|
return l.fmtErr(fmt.Errorf(str, args...))
|
|
}
|
|
|
|
// discardRune must _always_ be called only after peekRune.
|
|
func (l *lexer) discardRune() {
|
|
|
|
r, _, err := l.r.ReadRune()
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
l.lastRow, l.lastCol = l.nextRowCol()
|
|
l.prevRune = r
|
|
}
|
|
|
|
func (l *lexer) peekRune() (rune, error) {
|
|
|
|
r, _, err := l.r.ReadRune()
|
|
|
|
if err != nil {
|
|
return '0', err
|
|
|
|
} else if err := l.r.UnreadRune(); err != nil {
|
|
|
|
// since the most recent operation on the bufio.Reader was a ReadRune,
|
|
// UnreadRune should never return an error
|
|
panic(err)
|
|
}
|
|
|
|
return r, nil
|
|
}
|
|
|
|
// readWhile reads runes until the given predicate returns false, and returns a
|
|
// LexerToken of the given kind whose Value is comprised of all runes which
|
|
// returned true.
|
|
//
|
|
// If an error is encountered then both the token (or what's been parsed of it
|
|
// so far) and the error are returned.
|
|
func (l *lexer) readWhile(
|
|
kind LexerTokenKind, pred func(rune) bool,
|
|
) (
|
|
LexerToken, *LexerError,
|
|
) {
|
|
|
|
row, col := l.nextRowCol()
|
|
|
|
l.stringBuilder.Reset()
|
|
|
|
var lexErr *LexerError
|
|
|
|
for {
|
|
|
|
r, err := l.peekRune()
|
|
|
|
if err != nil {
|
|
lexErr = l.fmtErrf("peeking next character: %w", err)
|
|
break
|
|
|
|
} else if !pred(r) {
|
|
break
|
|
}
|
|
|
|
l.stringBuilder.WriteRune(r)
|
|
|
|
l.discardRune()
|
|
}
|
|
|
|
return LexerToken{
|
|
Kind: kind,
|
|
Value: l.stringBuilder.String(),
|
|
Row: row, Col: col,
|
|
}, lexErr
|
|
}
|
|
|
|
// we only support base-10 integers at the moment.
|
|
func isNumber(r rune) bool {
|
|
return r == '-' || ('0' <= r && r <= '9')
|
|
}
|
|
|
|
// next can return a token, an error, or both. If an error is returned then no
|
|
// further calls to next should occur.
|
|
func (l *lexer) next() (LexerToken, *LexerError) {
|
|
|
|
for {
|
|
|
|
r, err := l.peekRune()
|
|
|
|
if err != nil {
|
|
return LexerToken{}, l.fmtErrf("peeking next character: %w", err)
|
|
}
|
|
|
|
switch {
|
|
|
|
case r == '*': // comment
|
|
|
|
// comments are everything up until a newline
|
|
_, err := l.readWhile("", func(r rune) bool {
|
|
return r != '\n'
|
|
})
|
|
|
|
if err != nil {
|
|
return LexerToken{}, err
|
|
}
|
|
|
|
// terminating newline is deliberately not discarded. Loop and find
|
|
// the next token (which will be that newline).
|
|
|
|
case r == '\n':
|
|
// newlines are considered punctuation, not whitespace
|
|
|
|
l.discardRune()
|
|
|
|
return LexerToken{
|
|
Kind: LexerTokenKindPunctuation,
|
|
Value: string(r),
|
|
Row: l.lastRow,
|
|
Col: l.lastCol,
|
|
}, nil
|
|
|
|
case r == '"' || r == '`':
|
|
|
|
// reserve double-quote and backtick for string parsing.
|
|
l.discardRune()
|
|
return LexerToken{}, l.fmtErrf("string parsing not yet implemented")
|
|
|
|
case unicode.IsLetter(r):
|
|
// letters denote the start of a name
|
|
|
|
return l.readWhile(LexerTokenKindName, func(r rune) bool {
|
|
|
|
if unicode.In(r, unicode.Letter, unicode.Number, unicode.Mark) {
|
|
return true
|
|
}
|
|
|
|
if r == '-' {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
})
|
|
|
|
case isNumber(r):
|
|
return l.readWhile(LexerTokenKindNumber, isNumber)
|
|
|
|
case unicode.IsPunct(r) || unicode.IsSymbol(r):
|
|
// symbols are also considered punctuation
|
|
|
|
l.discardRune()
|
|
|
|
return LexerToken{
|
|
Kind: LexerTokenKindPunctuation,
|
|
Value: string(r),
|
|
Row: l.lastRow,
|
|
Col: l.lastCol,
|
|
}, nil
|
|
|
|
case unicode.IsSpace(r):
|
|
l.discardRune()
|
|
|
|
default:
|
|
return LexerToken{}, l.fmtErrf("unexpected character %q", r)
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
func (l *lexer) Next() (LexerToken, error) {
|
|
|
|
if l.err != nil {
|
|
return LexerToken{}, l.err
|
|
}
|
|
|
|
tok, err := l.next()
|
|
|
|
if err != nil {
|
|
|
|
l.err = err
|
|
|
|
if tok.Kind == "" {
|
|
return LexerToken{}, l.err
|
|
}
|
|
}
|
|
|
|
return tok, nil
|
|
}
|