295 lines
7.7 KiB
Go
295 lines
7.7 KiB
Go
|
package grammar
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"strings"
|
||
|
)
|
||
|
|
||
|
// ErrNoMatch is used by Symbol's Decode method, see that method's docs for more
|
||
|
// details.
|
||
|
var ErrNoMatch = errors.New("no match")
|
||
|
|
||
|
// Symbol represents a symbol in the grammar. A Symbol is expected to be
|
||
|
// stateless, and is usually constructed from other Symbols using functions in
|
||
|
// this package.
|
||
|
type Symbol[T any] interface {
|
||
|
fmt.Stringer // Used when generating errors related to this Symbol, e.g. "number"
|
||
|
|
||
|
// Decode reads and parses a value represented by this Symbol off the
|
||
|
// Reader.
|
||
|
//
|
||
|
// This may return ErrNoMatch to indicate that the upcoming data on the
|
||
|
// Reader is rejected by this Symbol. In this case the Symbol should leave
|
||
|
// the Reader in the same state it was passed.
|
||
|
Decode(Reader) (T, error)
|
||
|
}
|
||
|
|
||
|
type symbol[T any] struct {
|
||
|
fmt.Stringer
|
||
|
decodeFn func(Reader) (T, error)
|
||
|
}
|
||
|
|
||
|
func (s *symbol[T]) Decode(r Reader) (T, error) { return s.decodeFn(r) }
|
||
|
|
||
|
// SymbolPtr wraps a Symbol in a such a way as to make lazily initializing a
|
||
|
// Symbol variable possible. This allows for recursion amongst different
|
||
|
// Symbols.
|
||
|
//
|
||
|
// Example:
|
||
|
//
|
||
|
// a := new(SymbolPtr)
|
||
|
// b := new(SymbolPtr)
|
||
|
// a.Symbol = FirstOf(Rune('a'), b)
|
||
|
// b.Symbol = FirstOf(Rune('b'), a)
|
||
|
type SymbolPtr[T any] struct {
|
||
|
Symbol[T]
|
||
|
}
|
||
|
|
||
|
// RuneFunc matches and produces any rune for which the given function returns
|
||
|
// true.
|
||
|
func RuneFunc(stringer fmt.Stringer, fn func(rune) bool) Symbol[Located[rune]] {
|
||
|
return &symbol[Located[rune]]{
|
||
|
stringer,
|
||
|
func(rr Reader) (Located[rune], error) {
|
||
|
var zero Located[rune]
|
||
|
|
||
|
r, err := rr.ReadRune()
|
||
|
if errors.Is(err, io.EOF) {
|
||
|
return zero, ErrNoMatch
|
||
|
} else if err != nil {
|
||
|
return zero, err
|
||
|
}
|
||
|
|
||
|
if !fn(r.Value) {
|
||
|
rr.UnreadRune(r)
|
||
|
return zero, ErrNoMatch
|
||
|
}
|
||
|
|
||
|
return r, nil
|
||
|
},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Rune matches and produces the given rune.
|
||
|
func Rune(r rune) Symbol[Located[rune]] {
|
||
|
return RuneFunc(
|
||
|
Stringer{S: fmt.Sprintf("'%c'", r)},
|
||
|
func(r2 rune) bool { return r == r2 },
|
||
|
)
|
||
|
}
|
||
|
|
||
|
// StringFromRunes produces a string from the slice of runes produced by the
|
||
|
// given Symbol. The slice must not be empty. StringFromRunes does not match if
|
||
|
// the given Symbol does not match.
|
||
|
func StringFromRunes(sym Symbol[[]Located[rune]]) Symbol[Located[string]] {
|
||
|
return Mapping(sym, sym, func(runes []Located[rune]) Located[string] {
|
||
|
if len(runes) == 0 {
|
||
|
panic("StringFromRunes used on empty set of runes")
|
||
|
}
|
||
|
|
||
|
str := make([]rune, len(runes))
|
||
|
for i := range runes {
|
||
|
str[i] = runes[i].Value
|
||
|
}
|
||
|
return Located[string]{runes[0].Location, string(str)}
|
||
|
})
|
||
|
}
|
||
|
|
||
|
// Mapping produces a value of type Tb by decoding a value from the given
|
||
|
// Symbol and passing it through the given mapping function. If the given Symbol
|
||
|
// doesn't match then neither does Map.
|
||
|
func Mapping[Ta, Tb any](
|
||
|
stringer fmt.Stringer, sym Symbol[Ta], fn func(Ta) Tb,
|
||
|
) Symbol[Tb] {
|
||
|
return &symbol[Tb]{
|
||
|
stringer,
|
||
|
func(rr Reader) (Tb, error) {
|
||
|
var zero Tb
|
||
|
va, err := sym.Decode(rr)
|
||
|
if err != nil {
|
||
|
return zero, err
|
||
|
}
|
||
|
return fn(va), nil
|
||
|
},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// OneOrMore will produce as many of the given Symbol's value as can be found
|
||
|
// sequentially, up until a non-matching value is encountered. If no matches are
|
||
|
// found then OneOrMore does not match.
|
||
|
func OneOrMore[T any](sym Symbol[T]) Symbol[[]T] {
|
||
|
return &symbol[[]T]{
|
||
|
Stringer{F: func() string {
|
||
|
return fmt.Sprintf("one or more %v", sym)
|
||
|
}},
|
||
|
func(rr Reader) ([]T, error) {
|
||
|
var vv []T
|
||
|
for {
|
||
|
v, err := sym.Decode(rr)
|
||
|
if errors.Is(err, ErrNoMatch) {
|
||
|
break
|
||
|
} else if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
vv = append(vv, v)
|
||
|
}
|
||
|
|
||
|
if len(vv) == 0 {
|
||
|
return nil, ErrNoMatch
|
||
|
}
|
||
|
|
||
|
return vv, nil
|
||
|
},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// ZeroOrMore will produce as many of the given Symbol's value as can be found
|
||
|
// sequentially, up until a non-matching value is encountered. If no matches are
|
||
|
// found then an empty slice is produced.
|
||
|
func ZeroOrMore[T any](sym Symbol[T]) Symbol[[]T] {
|
||
|
return &symbol[[]T]{
|
||
|
Stringer{F: func() string {
|
||
|
return fmt.Sprintf("zero or more %v", sym)
|
||
|
}},
|
||
|
func(rr Reader) ([]T, error) {
|
||
|
var vv []T
|
||
|
for {
|
||
|
v, err := sym.Decode(rr)
|
||
|
if errors.Is(err, ErrNoMatch) {
|
||
|
break
|
||
|
} else if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
vv = append(vv, v)
|
||
|
}
|
||
|
|
||
|
return vv, nil
|
||
|
},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func firstOf[T any](stringer fmt.Stringer, syms ...Symbol[T]) Symbol[T] {
|
||
|
return &symbol[T]{
|
||
|
stringer,
|
||
|
func(rr Reader) (T, error) {
|
||
|
var zero T
|
||
|
for _, sym := range syms {
|
||
|
v, err := sym.Decode(rr)
|
||
|
if errors.Is(err, ErrNoMatch) {
|
||
|
continue
|
||
|
} else if err != nil {
|
||
|
return zero, err
|
||
|
}
|
||
|
|
||
|
return v, nil
|
||
|
}
|
||
|
|
||
|
return zero, ErrNoMatch
|
||
|
},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// FirstOf matches and produces the value for the first Symbol in the list which
|
||
|
// matches. FirstOf does not match if none of the given Symbols match.
|
||
|
func FirstOf[T any](syms ...Symbol[T]) Symbol[T] {
|
||
|
return firstOf(
|
||
|
Stringer{F: func() string {
|
||
|
descrs := make([]string, len(syms))
|
||
|
for i := range syms {
|
||
|
descrs[i] = syms[i].String()
|
||
|
}
|
||
|
return strings.Join(descrs, " or ")
|
||
|
}},
|
||
|
syms...,
|
||
|
)
|
||
|
}
|
||
|
|
||
|
// Reduction produces a value of type Tc by first reading a value from symA,
|
||
|
// then symB, and then running those through the given function.
|
||
|
//
|
||
|
// If symA does not match then Reduction does not match. If symA matches but
|
||
|
// symB does not then also match then Reduction produces a LocatedError.
|
||
|
func Reduction[Ta, Tb, Tc any](
|
||
|
stringer fmt.Stringer,
|
||
|
symA Symbol[Ta],
|
||
|
symB Symbol[Tb],
|
||
|
fn func(Ta, Tb) Tc,
|
||
|
) Symbol[Tc] {
|
||
|
return &symbol[Tc]{
|
||
|
stringer,
|
||
|
func(rr Reader) (Tc, error) {
|
||
|
var zero Tc
|
||
|
|
||
|
va, err := symA.Decode(rr)
|
||
|
if err != nil {
|
||
|
return zero, err
|
||
|
}
|
||
|
|
||
|
vb, err := symB.Decode(rr)
|
||
|
if errors.Is(err, ErrNoMatch) {
|
||
|
return zero, rr.NextLocation().errf("expected %v", symB)
|
||
|
} else if err != nil {
|
||
|
return zero, err
|
||
|
}
|
||
|
|
||
|
return fn(va, vb), nil
|
||
|
|
||
|
},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Prefixed matches on prefixSym, discards its value, then produces the value
|
||
|
// produced by sym.
|
||
|
//
|
||
|
// If prefixSym does not match then Prefixed does not match. If prefixSym
|
||
|
// matches but sym does not also match then Prefixed produces a LocatedError.
|
||
|
func Prefixed[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb] {
|
||
|
return Reduction(prefixSym, prefixSym, sym, func(_ Ta, b Tb) Tb {
|
||
|
return b
|
||
|
})
|
||
|
}
|
||
|
|
||
|
// PrefixDiscarded is similar to Prefixed, except that if sym does not match
|
||
|
// then PrefixDiscarded does not match, whereas Prefixed produces a LocatedError
|
||
|
// in that case.
|
||
|
//
|
||
|
// NOTE PrefixDiscarded does not fully honor the contract of Symbol. If
|
||
|
// prefixSym matches, but sym does not, then only sym will restore Reader to its
|
||
|
// prior state; prefixSym cannot return whatever data it read back onto the
|
||
|
// Reader. Therefore ErrNoMatch can be returned without Reader being fully back
|
||
|
// in its original state. In practice this isn't a big deal, given the common
|
||
|
// use-cases of PrefixDiscarded, but it may prove tricky.
|
||
|
func PrefixDiscarded[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb] {
|
||
|
return &symbol[Tb]{
|
||
|
sym,
|
||
|
func(rr Reader) (Tb, error) {
|
||
|
var zero Tb
|
||
|
if _, err := prefixSym.Decode(rr); err != nil {
|
||
|
return zero, err
|
||
|
}
|
||
|
return sym.Decode(rr)
|
||
|
},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Suffixed matchs on sym and then suffixSym, returning the value produced by
|
||
|
// sym and discarding the one produced by suffixSym.
|
||
|
//
|
||
|
// If sym does not match then Suffixed does not match. If sym matches but
|
||
|
// suffixSym does not also match then Suffixed produces a LocatedError.
|
||
|
func Suffixed[Ta, Tb any](sym Symbol[Ta], suffixSym Symbol[Tb]) Symbol[Ta] {
|
||
|
return Reduction(sym, sym, suffixSym, func(a Ta, _ Tb) Ta {
|
||
|
return a
|
||
|
})
|
||
|
}
|
||
|
|
||
|
// Discard matches if the given Symbol does, but discards the value it produces,
|
||
|
// producing an empty value instead.
|
||
|
func Discard[T any](sym Symbol[T]) Symbol[struct{}] {
|
||
|
return Mapping(sym, sym, func(T) struct{} { return struct{}{} })
|
||
|
}
|