package grammar import ( "errors" "fmt" "io" "strings" ) // ErrNoMatch is used by Symbol's Decode method, see that method's docs for more // details. var ErrNoMatch = errors.New("no match") // Symbol represents a symbol in the grammar. A Symbol is expected to be // stateless, and is usually constructed from other Symbols using functions in // this package. type Symbol[T any] interface { fmt.Stringer // Used when generating errors related to this Symbol, e.g. "number" // Decode reads and parses a value represented by this Symbol off the // Reader. // // This may return ErrNoMatch to indicate that the upcoming data on the // Reader is rejected by this Symbol. In this case the Symbol should leave // the Reader in the same state it was passed. Decode(Reader) (T, error) } type symbol[T any] struct { fmt.Stringer decodeFn func(Reader) (T, error) } func (s *symbol[T]) Decode(r Reader) (T, error) { return s.decodeFn(r) } // SymbolPtr wraps a Symbol in a such a way as to make lazily initializing a // Symbol variable possible. This allows for recursion amongst different // Symbols. // // Example: // // a := new(SymbolPtr) // b := new(SymbolPtr) // a.Symbol = FirstOf(Rune('a'), b) // b.Symbol = FirstOf(Rune('b'), a) type SymbolPtr[T any] struct { Symbol[T] } func named[T any](stringer fmt.Stringer, sym Symbol[T]) Symbol[T] { return &symbol[T]{ stringer, sym.Decode, } } // Named wraps the given Symbol such that its String method returns the given // name. func Named[T any](name string, sym Symbol[T]) Symbol[T] { return named(Stringer{S: name}, sym) } // RuneFunc matches and produces any rune for which the given function returns // true. func RuneFunc(name string, fn func(rune) bool) Symbol[Located[rune]] { return &symbol[Located[rune]]{ Stringer{S: name}, func(rr Reader) (Located[rune], error) { var zero Located[rune] r, err := rr.ReadRune() if errors.Is(err, io.EOF) { return zero, ErrNoMatch } else if err != nil { return zero, err } if !fn(r.Value) { rr.UnreadRune(r) return zero, ErrNoMatch } return r, nil }, } } // Rune matches and produces the given rune. func Rune(r rune) Symbol[Located[rune]] { return RuneFunc( fmt.Sprintf("'%c'", r), func(r2 rune) bool { return r == r2 }, ) } // StringFromRunes produces a string from the slice of runes produced by the // given Symbol. The slice must not be empty. StringFromRunes does not match if // the given Symbol does not match. func StringFromRunes(sym Symbol[[]Located[rune]]) Symbol[Located[string]] { return Mapping(sym, func(runes []Located[rune]) Located[string] { if len(runes) == 0 { panic("StringFromRunes used on empty set of runes") } str := make([]rune, len(runes)) for i := range runes { str[i] = runes[i].Value } return Located[string]{runes[0].Location, string(str)} }) } // Mapping produces a value of type Tb by decoding a value from the given // Symbol and passing it through the given mapping function. If the given Symbol // doesn't match then neither does Map. func Mapping[Ta, Tb any]( sym Symbol[Ta], fn func(Ta) Tb, ) Symbol[Tb] { return &symbol[Tb]{ sym, func(rr Reader) (Tb, error) { var zero Tb va, err := sym.Decode(rr) if err != nil { return zero, err } return fn(va), nil }, } } // OneOrMore will produce as many of the given Symbol's value as can be found // sequentially, up until a non-matching value is encountered. If no matches are // found then OneOrMore does not match. func OneOrMore[T any](sym Symbol[T]) Symbol[[]T] { return &symbol[[]T]{ Stringer{F: func() string { return fmt.Sprintf("one or more %v", sym) }}, func(rr Reader) ([]T, error) { var vv []T for { v, err := sym.Decode(rr) if errors.Is(err, ErrNoMatch) { break } else if err != nil { return nil, err } vv = append(vv, v) } if len(vv) == 0 { return nil, ErrNoMatch } return vv, nil }, } } // ZeroOrMore will produce as many of the given Symbol's value as can be found // sequentially, up until a non-matching value is encountered. If no matches are // found then an empty slice is produced. func ZeroOrMore[T any](sym Symbol[T]) Symbol[[]T] { return &symbol[[]T]{ Stringer{F: func() string { return fmt.Sprintf("zero or more %v", sym) }}, func(rr Reader) ([]T, error) { var vv []T for { v, err := sym.Decode(rr) if errors.Is(err, ErrNoMatch) { break } else if err != nil { return nil, err } vv = append(vv, v) } return vv, nil }, } } func firstOf[T any](stringer fmt.Stringer, syms ...Symbol[T]) Symbol[T] { return &symbol[T]{ stringer, func(rr Reader) (T, error) { var zero T for _, sym := range syms { v, err := sym.Decode(rr) if errors.Is(err, ErrNoMatch) { continue } else if err != nil { return zero, err } return v, nil } return zero, ErrNoMatch }, } } // FirstOf matches and produces the value for the first Symbol in the list which // matches. FirstOf does not match if none of the given Symbols match. func FirstOf[T any](syms ...Symbol[T]) Symbol[T] { return firstOf( Stringer{F: func() string { descrs := make([]string, len(syms)) for i := range syms { descrs[i] = syms[i].String() } return strings.Join(descrs, " or ") }}, syms..., ) } // Reduction produces a value of type Tc by first reading a value from symA, // then symB, and then running those through the given function. // // If symA does not match then Reduction does not match. If symA matches but // symB does not then also match then Reduction produces a LocatedError. func Reduction[Ta, Tb, Tc any]( symA Symbol[Ta], symB Symbol[Tb], fn func(Ta, Tb) Tc, ) Symbol[Tc] { return &symbol[Tc]{ symA, func(rr Reader) (Tc, error) { var zero Tc va, err := symA.Decode(rr) if err != nil { return zero, err } vb, err := symB.Decode(rr) if errors.Is(err, ErrNoMatch) { return zero, rr.NextLocation().errf("expected %v", symB) } else if err != nil { return zero, err } return fn(va, vb), nil }, } } // Prefixed matches on prefixSym, discards its value, then produces the value // produced by sym. // // If prefixSym does not match then Prefixed does not match. If prefixSym // matches but sym does not also match then Prefixed produces a LocatedError. func Prefixed[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb] { return named(prefixSym, Reduction(prefixSym, sym, func(_ Ta, b Tb) Tb { return b })) } // PrefixDiscarded is similar to Prefixed, except that if sym does not match // then PrefixDiscarded does not match, whereas Prefixed produces a LocatedError // in that case. // // NOTE PrefixDiscarded does not fully honor the contract of Symbol. If // prefixSym matches, but sym does not, then only sym will restore Reader to its // prior state; prefixSym cannot return whatever data it read back onto the // Reader. Therefore ErrNoMatch can be returned without Reader being fully back // in its original state. In practice this isn't a big deal, given the common // use-cases of PrefixDiscarded, but it may prove tricky. func PrefixDiscarded[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb] { return &symbol[Tb]{ sym, func(rr Reader) (Tb, error) { var zero Tb if _, err := prefixSym.Decode(rr); err != nil { return zero, err } return sym.Decode(rr) }, } } // Suffixed matchs on sym and then suffixSym, returning the value produced by // sym and discarding the one produced by suffixSym. // // If sym does not match then Suffixed does not match. If sym matches but // suffixSym does not also match then Suffixed produces a LocatedError. func Suffixed[Ta, Tb any](sym Symbol[Ta], suffixSym Symbol[Tb]) Symbol[Ta] { return named(sym, Reduction(sym, suffixSym, func(a Ta, _ Tb) Ta { return a })) } // Discard matches if the given Symbol does, but discards the value it produces, // producing an empty value instead. func Discard[T any](sym Symbol[T]) Symbol[struct{}] { return Mapping(sym, func(T) struct{} { return struct{}{} }) }