Implement Decoder

The sytax has ultimately remained unchanged, except for the new ability
to have tuples as edge values, changing the tuple value separator to
`,`, and allowing for the eliding of trailing `;` and `,`.
This commit is contained in:
Brian Picciano 2022-12-27 17:12:28 +01:00
parent e06b20b604
commit 92f93ff076
4 changed files with 490 additions and 36 deletions

View File

@ -1,12 +1,17 @@
use std::hash::Hash;
use std::hash::{Hash, Hasher};
use im_rc::{HashSet};
pub mod lexer;
mod lexer;
mod decoder;
#[derive(Clone, Eq, Hash, PartialEq)]
pub use decoder::Decoder;
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(test, derive(Debug))]
pub struct OpenEdge(Value, Value); // edge, src
#[derive(Clone, Eq, Hash, PartialEq)]
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(test, derive(Debug))]
pub enum Value{
Name(String),
Number(i64),
@ -14,13 +19,10 @@ pub enum Value{
Graph(Graph),
}
#[derive(Clone, Eq, Hash, PartialEq)]
struct Edge<V> {
dst_val: V,
src_val: V,
}
pub const ZERO_TUPLE: Value = Value::Tuple(vec![]);
#[derive(Clone, Eq, Hash, PartialEq)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(test, derive(Debug))]
pub struct Graph {
edges: HashSet<(Value, OpenEdge)>, // dst, src
}
@ -31,9 +33,58 @@ impl Graph {
Graph{edges: HashSet::new()}
}
pub fn with(&self, dst_val: Value, src_edge: OpenEdge) -> Self {
pub fn with(&self, dst_val: Value, edge_val: Value, src_val: Value) -> Self {
Graph{
edges: self.edges.update((dst_val, src_edge)),
edges: self.edges.update((dst_val, OpenEdge(edge_val, src_val))),
}
}
}
// The implementation of hash for im_rc::HashSet does not sort the entries.
impl Hash for Graph {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut edges = Vec::from_iter(&self.edges);
edges.sort();
edges.iter().for_each(|edge| edge.hash(state));
}
}
#[cfg(test)]
mod tests {
use super::*;
fn number(i: i64) -> Value {
Value::Number(i)
}
#[test]
fn equality() {
let g1 = Graph::new()
.with(number(0), number(1), number(2))
.with(number(3), number(4), number(5));
let g2 = Graph::new()
.with(number(3), number(4), number(5))
.with(number(0), number(1), number(2));
assert_eq!(g1, g2);
}
#[test]
fn deep_equality() {
let g1 = Graph::new().with(number(-2), ZERO_TUPLE, Value::Graph(Graph::new()
.with(number(0), number(1), number(2))
.with(number(3), number(4), number(5)),
));
let g2 = Graph::new().with(number(-2), ZERO_TUPLE, Value::Graph(Graph::new()
.with(number(3), number(4), number(5))
.with(number(0), number(1), number(2)),
));
assert_eq!(g1, g2);
}
}

380
rust/src/gg/decoder.rs Normal file
View File

@ -0,0 +1,380 @@
use std::io::{self, Read};
use super::{Graph, Value, OpenEdge, ZERO_TUPLE};
use super::lexer::{self, Lexer, Token, TokenKind, Location};
// In order to make sense of this file, check out the accompanying gg.bnf, which describes the
// grammar in BNF notation. Each method in the Decoder maps more or less exactly to a state within
// the BNF.
#[cfg_attr(test, derive(Debug))]
pub enum Error {
Decoding(String, Location),
IO(io::Error),
}
impl From<lexer::Error> for Error {
fn from(e: lexer::Error) -> Self {
match e {
lexer::Error::Tokenizing(s, loc) => Error::Decoding(s, loc),
lexer::Error::IO(e) => Error::IO(e)
}
}
}
static OUTER_GRAPH_TERM: Token = Token{
kind: TokenKind::End,
value: String::new(),
};
pub struct Decoder<R: Read> {
lexer: Lexer<R>,
}
impl<R: Read> Decoder<R> {
pub fn new(r: R) -> Decoder<R> {
Decoder{
lexer: Lexer::new(r),
}
}
pub fn decode_undelimited(&mut self) -> Result<Graph, Error> {
self.outer_graph(Graph::new())
}
fn exp_punct(&mut self, v: &'static str) -> Result<(), Error> {
match self.lexer.next()? {
(Token{kind: TokenKind::Punctuation, value: v2}, _) if v == v2 => Ok(()),
(tok, loc) => Err(Error::Decoding(
format!("expected '{}', found: {}", v, tok),
loc,
)),
}
}
fn generic_graph(&mut self, term_tok: &Token, g: Graph) -> Result<Graph, Error> {
match self.lexer.next()? {
(tok, _) if tok == *term_tok => Ok(g),
(Token{kind: TokenKind::Name, value: name}, _) => {
self.exp_punct("=")?;
let open_edge = self.generic_graph_tail(term_tok, ZERO_TUPLE)?;
self.generic_graph(term_tok, g.with(
Value::Name(name),
open_edge.0,
open_edge.1,
))
}
(tok, loc) => Err(Error::Decoding(
format!("expected name or {}, found: {}", term_tok, tok),
loc,
)),
}
}
fn generic_graph_tail(&mut self, term_tok: &Token, edge_val: Value) -> Result<OpenEdge, Error> {
let val = self.value()?;
match self.lexer.next()? {
(Token{kind: TokenKind::Punctuation, value: v}, _) if v == ";" =>
Ok(OpenEdge(edge_val, val)),
(Token{kind: TokenKind::Punctuation, value: v}, _) if v == "<" =>
if edge_val == ZERO_TUPLE {
self.generic_graph_tail(term_tok, val)
} else {
Ok(OpenEdge(edge_val, Value::Tuple(vec![
self.generic_graph_tail(term_tok, val)?,
])))
},
(tok, loc) => {
self.lexer.push_next(tok, loc);
Ok(OpenEdge(edge_val, val))
},
}
}
fn outer_graph(&mut self, g: Graph) -> Result<Graph, Error> {
self.generic_graph(&OUTER_GRAPH_TERM, g)
}
fn graph(&mut self, g: Graph) -> Result<Graph, Error> {
let term_tok = Token{
kind: TokenKind::Punctuation,
value: String::from("}"),
};
self.generic_graph(&term_tok, g)
}
fn tuple(&mut self, tuple_vec: &mut Vec<OpenEdge>) -> Result<(), Error> {
loop {
match self.lexer.next()? {
(Token{kind: TokenKind::Punctuation, value: v}, _) if v == ")" =>
return Ok(()),
(tok, loc) => {
self.lexer.push_next(tok, loc);
tuple_vec.push(self.tuple_tail(ZERO_TUPLE)?);
},
}
}
}
fn tuple_tail(&mut self, edge_val: Value) -> Result<OpenEdge, Error> {
let val = self.value()?;
match self.lexer.next()? {
(Token{kind: TokenKind::Punctuation, value: v}, _) if v == "," =>
Ok(OpenEdge(edge_val, val)),
(Token{kind: TokenKind::Punctuation, value: v}, _) if v == "<" =>
if edge_val == ZERO_TUPLE {
self.tuple_tail(val)
} else {
Ok(OpenEdge(edge_val, Value::Tuple(vec![
self.tuple_tail(val)?,
])))
},
(tok, loc) => {
self.lexer.push_next(tok, loc);
Ok(OpenEdge(edge_val, val))
},
}
}
fn value(&mut self) -> Result<Value, Error> {
match self.lexer.next()? {
(Token{kind: TokenKind::Name, value: v}, _) =>
Ok(Value::Name(v)),
(Token{kind: TokenKind::Number, value: v}, loc) =>
match v.parse::<i64>() {
Ok(n) => Ok(Value::Number(n)),
Err(e) => Err(Error::Decoding(
format!("parsing {:#?} as integer: {}", v, e),
loc,
)),
},
(Token{kind: TokenKind::Punctuation, value: v}, _) if v == "(" => {
let mut vec = Vec::new();
self.tuple(&mut vec)?;
Ok(Value::Tuple(vec))
},
(Token{kind: TokenKind::Punctuation, value: v}, _) if v == "{" =>
Ok(Value::Graph(self.graph(Graph::new())?)),
(tok, loc) => Err(Error::Decoding(
format!("expected name, number, '(', or '{{', found: {}", tok),
loc,
)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decoder() {
fn name(s: &'static str) -> Value {
Value::Name(s.to_string())
}
fn number(i: i64) -> Value {
Value::Number(i)
}
struct Test {
input: &'static str,
exp: Graph,
}
let tests = vec!{
Test{
input: "",
exp: Graph::new(),
},
Test{
input: "out = 1",
exp: Graph::new().
with(name("out"), ZERO_TUPLE, number(1)),
},
Test{
input: "out = 1;",
exp: Graph::new().
with(name("out"), ZERO_TUPLE, number(1)),
},
Test{
input: "out = incr < 1",
exp: Graph::new().
with(name("out"), name("incr"), number(1)),
},
Test{
input: "out = incr < 1;",
exp: Graph::new().
with(name("out"), name("incr"), number(1)),
},
Test{
input: "out = a < b < 1",
exp: Graph::new().with(
name("out"),
name("a"),
Value::Tuple(vec![OpenEdge(name("b"), number(1))]),
),
},
Test{
input: "out = a < b < 1;",
exp: Graph::new().with(
name("out"),
name("a"),
Value::Tuple(vec![OpenEdge(name("b"), number(1))]),
),
},
Test{
input: "out = a < b < (1, c < 2, d < e < 3)",
exp: Graph::new().with(
name("out"),
name("a"),
Value::Tuple(vec![
OpenEdge(name("b"), Value::Tuple(vec![
OpenEdge(ZERO_TUPLE, number(1)),
OpenEdge(name("c"), number(2)),
OpenEdge(name("d"), Value::Tuple(vec![
OpenEdge(name("e"), number(3)),
])),
])),
]),
),
},
Test{
input: "out = (c < 2,);",
exp: Graph::new().with(
name("out"),
ZERO_TUPLE,
Value::Tuple(vec![
OpenEdge(name("c"), number(2)),
]),
),
},
Test{
input: "out = (1, c < 2) < 3;",
exp: Graph::new().with(
name("out"),
Value::Tuple(vec![
OpenEdge(ZERO_TUPLE, number(1)),
OpenEdge(name("c"), number(2)),
]),
number(3),
),
},
Test{
input: "out = a < b < (1, c < (d < 2, 3))",
exp: Graph::new().with(
name("out"),
name("a"),
Value::Tuple(vec![
OpenEdge(name("b"), Value::Tuple(vec![
OpenEdge(ZERO_TUPLE, number(1)),
OpenEdge(name("c"), Value::Tuple(vec![
OpenEdge(name("d"), number(2)),
OpenEdge(ZERO_TUPLE, number(3)),
])),
])),
]),
),
},
Test{
input: "out = { a = 1; b = 2 < 3; c = 4 < 5 < 6 }",
exp: Graph::new().with(
name("out"),
ZERO_TUPLE,
Value::Graph(Graph::new()
.with(name("a"), ZERO_TUPLE, number(1))
.with(name("b"), number(2), number(3))
.with(name("c"), number(4), Value::Tuple(vec![
OpenEdge(number(5), number(6)),
])),
),
),
},
Test{
input: "out = { a = 1; };",
exp: Graph::new().with(
name("out"),
ZERO_TUPLE,
Value::Graph(Graph::new()
.with(name("a"), ZERO_TUPLE, number(1)),
),
),
},
Test{
input: "out = { a = 1; } < 2",
exp: Graph::new().with(
name("out"),
Value::Graph(Graph::new()
.with(name("a"), ZERO_TUPLE, number(1)),
),
number(2),
),
},
Test{
input: "out = { a = 1; } < 2; foo = 5 < 6",
exp: Graph::new()
.with(
name("out"),
Value::Graph(Graph::new()
.with(name("a"), ZERO_TUPLE, number(1)),
),
number(2),
)
.with(name("foo"), number(5), number(6)),
},
Test{
input: "out = { a = 1 } < 2; foo = 5 < 6;",
exp: Graph::new()
.with(
name("out"),
Value::Graph(Graph::new()
.with(name("a"), ZERO_TUPLE, number(1)),
),
number(2),
)
.with(name("foo"), number(5), number(6)),
},
};
for test in tests {
println!("INPUT: {:#?}", test.input);
let mut d = Decoder::new(test.input.as_bytes());
let got = d.decode_undelimited().expect("no errors expected");
assert_eq!(test.exp, got);
}
}
}

View File

@ -6,22 +6,19 @@
| <ascii-number> <number-tail>
<number-tail> ::= <ascii-number> <number-tail> | ""
<value> ::= <name> | <number> | <tuple> | <graph>
<value> ::= <name> | <number> | "(" <tuple> | "{" <graph>
<tuple> ::= "(" <tuple-edge>
<tuple-edge> ::= ")" | <tuple-edge-incomplete>
<tuple-edge-incomplete> ::= <value> ")"
| <value> "," <tuple-edge>
| <value> "<" <tuple-edge-incomplete>
<tuple> ::= ")" | <tuple-tail> <tuple>
<tuple-tail> ::= <value> ""
| <value> ","
| <value> "<" <tuple-tail>
<graph> ::= "{" <graph-branch>
<graph-branch> ::= "}" | <name> "<" <graph-edge-incomplete>
<graph-edge-incomplete> ::= <value> "}"
| <value> ";" <graph-branch>
| <value> "<" <graph-edge-incomplete>
<graph> ::= "}" | <name> "=" <graph-tail> <graph>
<graph-tail> ::= <value> ""
| <value> ";"
| <value> "<" <graph-tail>
<outer-graph> ::= <outer-graph-branch>
<outer-graph-branch> ::= <end> | <name> "<" <outer-graph-edge-incomplete>
<outer-graph-edge-incomplete> ::= <value> <end>
| <value> ";" <outer-graph-branch>
| <value> "<" <outer-graph-edge-incomplete>
<outer-graph> ::= <end> | <name> "=" <outer-graph-tail> <outer-graph>
<outer-graph-tail> ::= <value> ""
| <value> ";"
| <value> "<" <outer-graph-tail>

View File

@ -4,7 +4,8 @@ use unicode_categories::UnicodeCategories;
use char_reader::CharReader;
#[derive(Copy, Clone, Debug, PartialEq)]
#[derive(Copy, Clone, PartialEq)]
#[cfg_attr(test, derive(Debug))]
pub struct Location {
pub row: i64,
pub col: i64,
@ -16,9 +17,9 @@ impl fmt::Display for Location {
}
}
#[derive(Debug)]
#[cfg_attr(test, derive(Debug))]
pub enum Error {
Tokenizing(&'static str, Location),
Tokenizing(String, Location),
IO(io::Error),
}
@ -28,7 +29,8 @@ impl From<io::Error> for Error {
}
}
#[derive(Debug, PartialEq)]
#[derive(PartialEq, Clone)]
#[cfg_attr(test, derive(Debug))]
pub enum TokenKind {
Name,
Number,
@ -36,16 +38,28 @@ pub enum TokenKind {
End,
}
#[derive(Debug, PartialEq)]
#[derive(PartialEq, Clone)]
#[cfg_attr(test, derive(Debug))]
pub struct Token {
pub kind: TokenKind,
pub value: String,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.kind {
TokenKind::Name => write!(f, "{:#?}", self.value),
TokenKind::Number => write!(f, "{}", self.value),
TokenKind::Punctuation => write!(f, "'{}'", self.value),
TokenKind::End => write!(f, "<end>"),
}
}
}
pub struct Lexer<R: Read> {
r: CharReader<BufReader<R>>,
buf: String,
next_stack: Vec<(Token, Location)>,
next_loc: Location,
}
@ -55,6 +69,7 @@ impl<R: Read> Lexer<R>{
Lexer{
r: CharReader::new(BufReader::new(r)),
buf: String::new(),
next_stack: Vec::new(),
next_loc: Location{
row: 0,
col: 0,
@ -131,8 +146,16 @@ impl<R: Read> Lexer<R>{
c == '-' || ('0' <= c && c <= '9')
}
pub fn push_next(&mut self, token: Token, loc: Location) {
self.next_stack.push((token, loc))
}
pub fn next(&mut self) -> Result<(Token, Location), Error> {
if let Some(r) = self.next_stack.pop() {
return Ok(r);
}
loop {
let (c, ok) = self.peek_a_bool()?;
@ -169,7 +192,10 @@ impl<R: Read> Lexer<R>{
self.discard_while(|c| c.is_ascii_whitespace())?;
} else {
return Err(Error::Tokenizing("invalid character", self.next_loc));
return Err(Error::Tokenizing(
format!("unexpected character: {:#?}", c).to_string(),
self.next_loc,
));
}
}
}
@ -280,7 +306,7 @@ mod tests {
}
}
assert_eq!(*res.as_slice(), *test.exp)
assert_eq!(*test.exp, *res.as_slice())
}
}
}