ginger/gg/grammar/symbol.go

307 lines
7.9 KiB
Go

package grammar
import (
"errors"
"fmt"
"io"
"strings"
)
// ErrNoMatch is used by Symbol's Decode method, see that method's docs for more
// details.
var ErrNoMatch = errors.New("no match")
// Symbol represents a symbol in the grammar. A Symbol is expected to be
// stateless, and is usually constructed from other Symbols using functions in
// this package.
type Symbol[T any] interface {
fmt.Stringer // Used when generating errors related to this Symbol, e.g. "number"
// Decode reads and parses a value represented by this Symbol off the
// Reader.
//
// This may return ErrNoMatch to indicate that the upcoming data on the
// Reader is rejected by this Symbol. In this case the Symbol should leave
// the Reader in the same state it was passed.
Decode(Reader) (T, error)
}
type symbol[T any] struct {
fmt.Stringer
decodeFn func(Reader) (T, error)
}
func (s *symbol[T]) Decode(r Reader) (T, error) { return s.decodeFn(r) }
// SymbolPtr wraps a Symbol in a such a way as to make lazily initializing a
// Symbol variable possible. This allows for recursion amongst different
// Symbols.
//
// Example:
//
// a := new(SymbolPtr)
// b := new(SymbolPtr)
// a.Symbol = FirstOf(Rune('a'), b)
// b.Symbol = FirstOf(Rune('b'), a)
type SymbolPtr[T any] struct {
Symbol[T]
}
func named[T any](stringer fmt.Stringer, sym Symbol[T]) Symbol[T] {
return &symbol[T]{
stringer,
sym.Decode,
}
}
// Named wraps the given Symbol such that its String method returns the given
// name.
func Named[T any](name string, sym Symbol[T]) Symbol[T] {
return named(Stringer{S: name}, sym)
}
// RuneFunc matches and produces any rune for which the given function returns
// true.
func RuneFunc(name string, fn func(rune) bool) Symbol[Located[rune]] {
return &symbol[Located[rune]]{
Stringer{S: name},
func(rr Reader) (Located[rune], error) {
var zero Located[rune]
r, err := rr.ReadRune()
if errors.Is(err, io.EOF) {
return zero, ErrNoMatch
} else if err != nil {
return zero, err
}
if !fn(r.Value) {
rr.UnreadRune(r)
return zero, ErrNoMatch
}
return r, nil
},
}
}
// Rune matches and produces the given rune.
func Rune(r rune) Symbol[Located[rune]] {
return RuneFunc(
fmt.Sprintf("'%c'", r),
func(r2 rune) bool { return r == r2 },
)
}
// StringFromRunes produces a string from the slice of runes produced by the
// given Symbol. The slice must not be empty. StringFromRunes does not match if
// the given Symbol does not match.
func StringFromRunes(sym Symbol[[]Located[rune]]) Symbol[Located[string]] {
return Mapping(sym, func(runes []Located[rune]) Located[string] {
if len(runes) == 0 {
panic("StringFromRunes used on empty set of runes")
}
str := make([]rune, len(runes))
for i := range runes {
str[i] = runes[i].Value
}
return Located[string]{runes[0].Location, string(str)}
})
}
// Mapping produces a value of type Tb by decoding a value from the given
// Symbol and passing it through the given mapping function. If the given Symbol
// doesn't match then neither does Map.
func Mapping[Ta, Tb any](
sym Symbol[Ta], fn func(Ta) Tb,
) Symbol[Tb] {
return &symbol[Tb]{
sym,
func(rr Reader) (Tb, error) {
var zero Tb
va, err := sym.Decode(rr)
if err != nil {
return zero, err
}
return fn(va), nil
},
}
}
// OneOrMore will produce as many of the given Symbol's value as can be found
// sequentially, up until a non-matching value is encountered. If no matches are
// found then OneOrMore does not match.
func OneOrMore[T any](sym Symbol[T]) Symbol[[]T] {
return &symbol[[]T]{
Stringer{F: func() string {
return fmt.Sprintf("one or more %v", sym)
}},
func(rr Reader) ([]T, error) {
var vv []T
for {
v, err := sym.Decode(rr)
if errors.Is(err, ErrNoMatch) {
break
} else if err != nil {
return nil, err
}
vv = append(vv, v)
}
if len(vv) == 0 {
return nil, ErrNoMatch
}
return vv, nil
},
}
}
// ZeroOrMore will produce as many of the given Symbol's value as can be found
// sequentially, up until a non-matching value is encountered. If no matches are
// found then an empty slice is produced.
func ZeroOrMore[T any](sym Symbol[T]) Symbol[[]T] {
return &symbol[[]T]{
Stringer{F: func() string {
return fmt.Sprintf("zero or more %v", sym)
}},
func(rr Reader) ([]T, error) {
var vv []T
for {
v, err := sym.Decode(rr)
if errors.Is(err, ErrNoMatch) {
break
} else if err != nil {
return nil, err
}
vv = append(vv, v)
}
return vv, nil
},
}
}
func firstOf[T any](stringer fmt.Stringer, syms ...Symbol[T]) Symbol[T] {
return &symbol[T]{
stringer,
func(rr Reader) (T, error) {
var zero T
for _, sym := range syms {
v, err := sym.Decode(rr)
if errors.Is(err, ErrNoMatch) {
continue
} else if err != nil {
return zero, err
}
return v, nil
}
return zero, ErrNoMatch
},
}
}
// FirstOf matches and produces the value for the first Symbol in the list which
// matches. FirstOf does not match if none of the given Symbols match.
func FirstOf[T any](syms ...Symbol[T]) Symbol[T] {
return firstOf(
Stringer{F: func() string {
descrs := make([]string, len(syms))
for i := range syms {
descrs[i] = syms[i].String()
}
return strings.Join(descrs, " or ")
}},
syms...,
)
}
// Reduction produces a value of type Tc by first reading a value from symA,
// then symB, and then running those through the given function.
//
// If symA does not match then Reduction does not match. If symA matches but
// symB does not then also match then Reduction produces a LocatedError.
func Reduction[Ta, Tb, Tc any](
symA Symbol[Ta],
symB Symbol[Tb],
fn func(Ta, Tb) Tc,
) Symbol[Tc] {
return &symbol[Tc]{
symA,
func(rr Reader) (Tc, error) {
var zero Tc
va, err := symA.Decode(rr)
if err != nil {
return zero, err
}
vb, err := symB.Decode(rr)
if errors.Is(err, ErrNoMatch) {
return zero, rr.NextLocation().errf("expected %v", symB)
} else if err != nil {
return zero, err
}
return fn(va, vb), nil
},
}
}
// Prefixed matches on prefixSym, discards its value, then produces the value
// produced by sym.
//
// If prefixSym does not match then Prefixed does not match. If prefixSym
// matches but sym does not also match then Prefixed produces a LocatedError.
func Prefixed[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb] {
return named(prefixSym, Reduction(prefixSym, sym, func(_ Ta, b Tb) Tb {
return b
}))
}
// PrefixDiscarded is similar to Prefixed, except that if sym does not match
// then PrefixDiscarded does not match, whereas Prefixed produces a LocatedError
// in that case.
//
// NOTE PrefixDiscarded does not fully honor the contract of Symbol. If
// prefixSym matches, but sym does not, then only sym will restore Reader to its
// prior state; prefixSym cannot return whatever data it read back onto the
// Reader. Therefore ErrNoMatch can be returned without Reader being fully back
// in its original state. In practice this isn't a big deal, given the common
// use-cases of PrefixDiscarded, but it may prove tricky.
func PrefixDiscarded[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb] {
return &symbol[Tb]{
sym,
func(rr Reader) (Tb, error) {
var zero Tb
if _, err := prefixSym.Decode(rr); err != nil {
return zero, err
}
return sym.Decode(rr)
},
}
}
// Suffixed matchs on sym and then suffixSym, returning the value produced by
// sym and discarding the one produced by suffixSym.
//
// If sym does not match then Suffixed does not match. If sym matches but
// suffixSym does not also match then Suffixed produces a LocatedError.
func Suffixed[Ta, Tb any](sym Symbol[Ta], suffixSym Symbol[Tb]) Symbol[Ta] {
return named(sym, Reduction(sym, suffixSym, func(a Ta, _ Tb) Ta {
return a
}))
}
// Discard matches if the given Symbol does, but discards the value it produces,
// producing an empty value instead.
func Discard[T any](sym Symbol[T]) Symbol[struct{}] {
return Mapping(sym, func(T) struct{} { return struct{}{} })
}