From 92f93ff076b87afcefe4747ad4db997732c6ab65 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 27 Dec 2022 17:12:28 +0100 Subject: [PATCH] Implement Decoder The sytax has ultimately remained unchanged, except for the new ability to have tuples as edge values, changing the tuple value separator to `,`, and allowing for the eliding of trailing `;` and `,`. --- rust/src/gg.rs | 75 ++++++-- rust/src/gg/decoder.rs | 380 +++++++++++++++++++++++++++++++++++++++++ rust/src/gg/gg.bnf | 29 ++-- rust/src/gg/lexer.rs | 42 ++++- 4 files changed, 490 insertions(+), 36 deletions(-) create mode 100644 rust/src/gg/decoder.rs diff --git a/rust/src/gg.rs b/rust/src/gg.rs index 110e124..320bf6f 100644 --- a/rust/src/gg.rs +++ b/rust/src/gg.rs @@ -1,12 +1,17 @@ -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use im_rc::{HashSet}; -pub mod lexer; +mod lexer; +mod decoder; -#[derive(Clone, Eq, Hash, PartialEq)] +pub use decoder::Decoder; + +#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(test, derive(Debug))] pub struct OpenEdge(Value, Value); // edge, src -#[derive(Clone, Eq, Hash, PartialEq)] +#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(test, derive(Debug))] pub enum Value{ Name(String), Number(i64), @@ -14,13 +19,10 @@ pub enum Value{ Graph(Graph), } -#[derive(Clone, Eq, Hash, PartialEq)] -struct Edge { - dst_val: V, - src_val: V, -} +pub const ZERO_TUPLE: Value = Value::Tuple(vec![]); -#[derive(Clone, Eq, Hash, PartialEq)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(test, derive(Debug))] pub struct Graph { edges: HashSet<(Value, OpenEdge)>, // dst, src } @@ -31,9 +33,58 @@ impl Graph { Graph{edges: HashSet::new()} } - pub fn with(&self, dst_val: Value, src_edge: OpenEdge) -> Self { + pub fn with(&self, dst_val: Value, edge_val: Value, src_val: Value) -> Self { Graph{ - edges: self.edges.update((dst_val, src_edge)), + edges: self.edges.update((dst_val, OpenEdge(edge_val, src_val))), } } } + +// The implementation of hash for im_rc::HashSet does not sort the entries. +impl Hash for Graph { + fn hash(&self, state: &mut H) { + let mut edges = Vec::from_iter(&self.edges); + edges.sort(); + edges.iter().for_each(|edge| edge.hash(state)); + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + fn number(i: i64) -> Value { + Value::Number(i) + } + + #[test] + fn equality() { + + let g1 = Graph::new() + .with(number(0), number(1), number(2)) + .with(number(3), number(4), number(5)); + + let g2 = Graph::new() + .with(number(3), number(4), number(5)) + .with(number(0), number(1), number(2)); + + assert_eq!(g1, g2); + } + + #[test] + fn deep_equality() { + + let g1 = Graph::new().with(number(-2), ZERO_TUPLE, Value::Graph(Graph::new() + .with(number(0), number(1), number(2)) + .with(number(3), number(4), number(5)), + )); + + let g2 = Graph::new().with(number(-2), ZERO_TUPLE, Value::Graph(Graph::new() + .with(number(3), number(4), number(5)) + .with(number(0), number(1), number(2)), + )); + + assert_eq!(g1, g2); + } +} diff --git a/rust/src/gg/decoder.rs b/rust/src/gg/decoder.rs new file mode 100644 index 0000000..173fda7 --- /dev/null +++ b/rust/src/gg/decoder.rs @@ -0,0 +1,380 @@ +use std::io::{self, Read}; + +use super::{Graph, Value, OpenEdge, ZERO_TUPLE}; +use super::lexer::{self, Lexer, Token, TokenKind, Location}; + +// In order to make sense of this file, check out the accompanying gg.bnf, which describes the +// grammar in BNF notation. Each method in the Decoder maps more or less exactly to a state within +// the BNF. + +#[cfg_attr(test, derive(Debug))] +pub enum Error { + Decoding(String, Location), + IO(io::Error), +} + +impl From for Error { + fn from(e: lexer::Error) -> Self { + match e { + lexer::Error::Tokenizing(s, loc) => Error::Decoding(s, loc), + lexer::Error::IO(e) => Error::IO(e) + } + } +} + +static OUTER_GRAPH_TERM: Token = Token{ + kind: TokenKind::End, + value: String::new(), +}; + +pub struct Decoder { + lexer: Lexer, +} + +impl Decoder { + + pub fn new(r: R) -> Decoder { + Decoder{ + lexer: Lexer::new(r), + } + } + + pub fn decode_undelimited(&mut self) -> Result { + self.outer_graph(Graph::new()) + } + + fn exp_punct(&mut self, v: &'static str) -> Result<(), Error> { + + match self.lexer.next()? { + (Token{kind: TokenKind::Punctuation, value: v2}, _) if v == v2 => Ok(()), + (tok, loc) => Err(Error::Decoding( + format!("expected '{}', found: {}", v, tok), + loc, + )), + } + } + + fn generic_graph(&mut self, term_tok: &Token, g: Graph) -> Result { + + match self.lexer.next()? { + + (tok, _) if tok == *term_tok => Ok(g), + + (Token{kind: TokenKind::Name, value: name}, _) => { + self.exp_punct("=")?; + let open_edge = self.generic_graph_tail(term_tok, ZERO_TUPLE)?; + self.generic_graph(term_tok, g.with( + Value::Name(name), + open_edge.0, + open_edge.1, + )) + } + + (tok, loc) => Err(Error::Decoding( + format!("expected name or {}, found: {}", term_tok, tok), + loc, + )), + } + } + + fn generic_graph_tail(&mut self, term_tok: &Token, edge_val: Value) -> Result { + + let val = self.value()?; + + match self.lexer.next()? { + + (Token{kind: TokenKind::Punctuation, value: v}, _) if v == ";" => + Ok(OpenEdge(edge_val, val)), + + (Token{kind: TokenKind::Punctuation, value: v}, _) if v == "<" => + + if edge_val == ZERO_TUPLE { + self.generic_graph_tail(term_tok, val) + } else { + Ok(OpenEdge(edge_val, Value::Tuple(vec![ + self.generic_graph_tail(term_tok, val)?, + ]))) + }, + + (tok, loc) => { + self.lexer.push_next(tok, loc); + Ok(OpenEdge(edge_val, val)) + }, + } + } + + fn outer_graph(&mut self, g: Graph) -> Result { + self.generic_graph(&OUTER_GRAPH_TERM, g) + } + + fn graph(&mut self, g: Graph) -> Result { + + let term_tok = Token{ + kind: TokenKind::Punctuation, + value: String::from("}"), + }; + + self.generic_graph(&term_tok, g) + } + + fn tuple(&mut self, tuple_vec: &mut Vec) -> Result<(), Error> { + + loop { + match self.lexer.next()? { + + (Token{kind: TokenKind::Punctuation, value: v}, _) if v == ")" => + return Ok(()), + + (tok, loc) => { + self.lexer.push_next(tok, loc); + tuple_vec.push(self.tuple_tail(ZERO_TUPLE)?); + }, + } + } + } + + fn tuple_tail(&mut self, edge_val: Value) -> Result { + + let val = self.value()?; + + match self.lexer.next()? { + + (Token{kind: TokenKind::Punctuation, value: v}, _) if v == "," => + Ok(OpenEdge(edge_val, val)), + + (Token{kind: TokenKind::Punctuation, value: v}, _) if v == "<" => + + if edge_val == ZERO_TUPLE { + self.tuple_tail(val) + } else { + Ok(OpenEdge(edge_val, Value::Tuple(vec![ + self.tuple_tail(val)?, + ]))) + }, + + (tok, loc) => { + self.lexer.push_next(tok, loc); + Ok(OpenEdge(edge_val, val)) + }, + + } + } + + fn value(&mut self) -> Result { + + match self.lexer.next()? { + + (Token{kind: TokenKind::Name, value: v}, _) => + Ok(Value::Name(v)), + + (Token{kind: TokenKind::Number, value: v}, loc) => + match v.parse::() { + Ok(n) => Ok(Value::Number(n)), + Err(e) => Err(Error::Decoding( + format!("parsing {:#?} as integer: {}", v, e), + loc, + )), + }, + + (Token{kind: TokenKind::Punctuation, value: v}, _) if v == "(" => { + let mut vec = Vec::new(); + self.tuple(&mut vec)?; + Ok(Value::Tuple(vec)) + }, + + (Token{kind: TokenKind::Punctuation, value: v}, _) if v == "{" => + Ok(Value::Graph(self.graph(Graph::new())?)), + + (tok, loc) => Err(Error::Decoding( + format!("expected name, number, '(', or '{{', found: {}", tok), + loc, + )), + } + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn decoder() { + + fn name(s: &'static str) -> Value { + Value::Name(s.to_string()) + } + + fn number(i: i64) -> Value { + Value::Number(i) + } + + struct Test { + input: &'static str, + exp: Graph, + } + + let tests = vec!{ + Test{ + input: "", + exp: Graph::new(), + }, + Test{ + input: "out = 1", + exp: Graph::new(). + with(name("out"), ZERO_TUPLE, number(1)), + }, + Test{ + input: "out = 1;", + exp: Graph::new(). + with(name("out"), ZERO_TUPLE, number(1)), + }, + Test{ + input: "out = incr < 1", + exp: Graph::new(). + with(name("out"), name("incr"), number(1)), + }, + Test{ + input: "out = incr < 1;", + exp: Graph::new(). + with(name("out"), name("incr"), number(1)), + }, + Test{ + input: "out = a < b < 1", + exp: Graph::new().with( + name("out"), + name("a"), + Value::Tuple(vec![OpenEdge(name("b"), number(1))]), + ), + }, + Test{ + input: "out = a < b < 1;", + exp: Graph::new().with( + name("out"), + name("a"), + Value::Tuple(vec![OpenEdge(name("b"), number(1))]), + ), + }, + Test{ + input: "out = a < b < (1, c < 2, d < e < 3)", + exp: Graph::new().with( + name("out"), + name("a"), + Value::Tuple(vec![ + OpenEdge(name("b"), Value::Tuple(vec![ + OpenEdge(ZERO_TUPLE, number(1)), + OpenEdge(name("c"), number(2)), + OpenEdge(name("d"), Value::Tuple(vec![ + OpenEdge(name("e"), number(3)), + ])), + ])), + ]), + ), + }, + Test{ + input: "out = (c < 2,);", + exp: Graph::new().with( + name("out"), + ZERO_TUPLE, + Value::Tuple(vec![ + OpenEdge(name("c"), number(2)), + ]), + ), + }, + Test{ + input: "out = (1, c < 2) < 3;", + exp: Graph::new().with( + name("out"), + Value::Tuple(vec![ + OpenEdge(ZERO_TUPLE, number(1)), + OpenEdge(name("c"), number(2)), + ]), + number(3), + ), + }, + Test{ + input: "out = a < b < (1, c < (d < 2, 3))", + exp: Graph::new().with( + name("out"), + name("a"), + Value::Tuple(vec![ + OpenEdge(name("b"), Value::Tuple(vec![ + OpenEdge(ZERO_TUPLE, number(1)), + OpenEdge(name("c"), Value::Tuple(vec![ + OpenEdge(name("d"), number(2)), + OpenEdge(ZERO_TUPLE, number(3)), + ])), + ])), + ]), + ), + }, + Test{ + input: "out = { a = 1; b = 2 < 3; c = 4 < 5 < 6 }", + exp: Graph::new().with( + name("out"), + ZERO_TUPLE, + Value::Graph(Graph::new() + .with(name("a"), ZERO_TUPLE, number(1)) + .with(name("b"), number(2), number(3)) + .with(name("c"), number(4), Value::Tuple(vec![ + OpenEdge(number(5), number(6)), + ])), + ), + ), + }, + Test{ + input: "out = { a = 1; };", + exp: Graph::new().with( + name("out"), + ZERO_TUPLE, + Value::Graph(Graph::new() + .with(name("a"), ZERO_TUPLE, number(1)), + ), + ), + }, + Test{ + input: "out = { a = 1; } < 2", + exp: Graph::new().with( + name("out"), + Value::Graph(Graph::new() + .with(name("a"), ZERO_TUPLE, number(1)), + ), + number(2), + ), + }, + Test{ + input: "out = { a = 1; } < 2; foo = 5 < 6", + exp: Graph::new() + .with( + name("out"), + Value::Graph(Graph::new() + .with(name("a"), ZERO_TUPLE, number(1)), + ), + number(2), + ) + .with(name("foo"), number(5), number(6)), + }, + Test{ + input: "out = { a = 1 } < 2; foo = 5 < 6;", + exp: Graph::new() + .with( + name("out"), + Value::Graph(Graph::new() + .with(name("a"), ZERO_TUPLE, number(1)), + ), + number(2), + ) + .with(name("foo"), number(5), number(6)), + }, + }; + + for test in tests { + println!("INPUT: {:#?}", test.input); + + let mut d = Decoder::new(test.input.as_bytes()); + let got = d.decode_undelimited().expect("no errors expected"); + assert_eq!(test.exp, got); + } + + } +} diff --git a/rust/src/gg/gg.bnf b/rust/src/gg/gg.bnf index 3321945..76c3621 100644 --- a/rust/src/gg/gg.bnf +++ b/rust/src/gg/gg.bnf @@ -6,22 +6,19 @@ | ::= | "" - ::= | | | + ::= | | "(" | "{" - ::= "(" - ::= ")" | - ::= ")" - | "," - | "<" + ::= ")" | + ::= "" + | "," + | "<" - ::= "{" - ::= "}" | "<" - ::= "}" - | ";" - | "<" + ::= "}" | "=" + ::= "" + | ";" + | "<" - ::= - ::= | "<" - ::= - | ";" - | "<" + ::= | "=" + ::= "" + | ";" + | "<" diff --git a/rust/src/gg/lexer.rs b/rust/src/gg/lexer.rs index eb222d6..592020b 100644 --- a/rust/src/gg/lexer.rs +++ b/rust/src/gg/lexer.rs @@ -4,7 +4,8 @@ use unicode_categories::UnicodeCategories; use char_reader::CharReader; -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Copy, Clone, PartialEq)] +#[cfg_attr(test, derive(Debug))] pub struct Location { pub row: i64, pub col: i64, @@ -16,9 +17,9 @@ impl fmt::Display for Location { } } -#[derive(Debug)] +#[cfg_attr(test, derive(Debug))] pub enum Error { - Tokenizing(&'static str, Location), + Tokenizing(String, Location), IO(io::Error), } @@ -28,7 +29,8 @@ impl From for Error { } } -#[derive(Debug, PartialEq)] +#[derive(PartialEq, Clone)] +#[cfg_attr(test, derive(Debug))] pub enum TokenKind { Name, Number, @@ -36,16 +38,28 @@ pub enum TokenKind { End, } -#[derive(Debug, PartialEq)] +#[derive(PartialEq, Clone)] +#[cfg_attr(test, derive(Debug))] pub struct Token { pub kind: TokenKind, pub value: String, } +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.kind { + TokenKind::Name => write!(f, "{:#?}", self.value), + TokenKind::Number => write!(f, "{}", self.value), + TokenKind::Punctuation => write!(f, "'{}'", self.value), + TokenKind::End => write!(f, ""), + } + } +} + pub struct Lexer { r: CharReader>, buf: String, - + next_stack: Vec<(Token, Location)>, next_loc: Location, } @@ -55,6 +69,7 @@ impl Lexer{ Lexer{ r: CharReader::new(BufReader::new(r)), buf: String::new(), + next_stack: Vec::new(), next_loc: Location{ row: 0, col: 0, @@ -131,8 +146,16 @@ impl Lexer{ c == '-' || ('0' <= c && c <= '9') } + pub fn push_next(&mut self, token: Token, loc: Location) { + self.next_stack.push((token, loc)) + } + pub fn next(&mut self) -> Result<(Token, Location), Error> { + if let Some(r) = self.next_stack.pop() { + return Ok(r); + } + loop { let (c, ok) = self.peek_a_bool()?; @@ -169,7 +192,10 @@ impl Lexer{ self.discard_while(|c| c.is_ascii_whitespace())?; } else { - return Err(Error::Tokenizing("invalid character", self.next_loc)); + return Err(Error::Tokenizing( + format!("unexpected character: {:#?}", c).to_string(), + self.next_loc, + )); } } } @@ -280,7 +306,7 @@ mod tests { } } - assert_eq!(*res.as_slice(), *test.exp) + assert_eq!(*test.exp, *res.as_slice()) } } }