Implemented lexer
This commit is contained in:
parent
0248a11285
commit
2919bcaa77
14
rust/Cargo.lock
generated
14
rust/Cargo.lock
generated
@ -11,11 +11,19 @@ dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "char_reader"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37a59b22dec21ca7d6c173bd543eeab4cd2f36cf21f039a4134905034c87ed3a"
|
||||
|
||||
[[package]]
|
||||
name = "ginger"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"char_reader",
|
||||
"im-rc",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -63,6 +71,12 @@ version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
|
||||
|
||||
[[package]]
|
||||
name = "unicode_categories"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
|
@ -7,3 +7,5 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
im-rc = "15.1.0"
|
||||
char_reader = "0.1.1"
|
||||
unicode_categories = "0.1.1"
|
||||
|
10
rust/src/gg.rs
Normal file
10
rust/src/gg.rs
Normal file
@ -0,0 +1,10 @@
|
||||
pub mod lexer;
|
||||
|
||||
use super::graph::Graph;
|
||||
|
||||
#[derive(Clone, Eq, Hash, PartialEq, Debug)]
|
||||
pub enum Value<'a>{
|
||||
Name(&'a str),
|
||||
Number(i64),
|
||||
Graph(&'a Graph<Value<'a>, Value<'a>>),
|
||||
}
|
160
rust/src/gg/lexer.rs
Normal file
160
rust/src/gg/lexer.rs
Normal file
@ -0,0 +1,160 @@
|
||||
use std::fmt;
|
||||
use std::io::{self, Read, BufReader};
|
||||
use unicode_categories::UnicodeCategories;
|
||||
|
||||
use char_reader::CharReader;
|
||||
|
||||
pub struct Location {
|
||||
pub row: i64,
|
||||
pub col: i64,
|
||||
}
|
||||
|
||||
impl fmt::Display for Location {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}:{}", self.row, self.col)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Error {
|
||||
Tokenizing(&'static str, Location),
|
||||
IO(io::Error),
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error{
|
||||
fn from(e: io::Error) -> Self {
|
||||
Error::IO(e)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum TokenKind {
|
||||
Name,
|
||||
Number,
|
||||
Punctuation,
|
||||
}
|
||||
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub value: String,
|
||||
pub location: Location,
|
||||
}
|
||||
|
||||
pub struct Lexer<R: Read> {
|
||||
r: CharReader<BufReader<R>>,
|
||||
buf: String,
|
||||
|
||||
prev_char: char,
|
||||
prev_loc: Location,
|
||||
}
|
||||
|
||||
impl<R: Read> Lexer<R>{
|
||||
|
||||
fn next_loc(&self) -> Location {
|
||||
|
||||
if self.prev_char == '\n' {
|
||||
return Location{
|
||||
row: self.prev_loc.row + 1,
|
||||
col: 0
|
||||
};
|
||||
}
|
||||
|
||||
return Location{
|
||||
row: self.prev_loc.row,
|
||||
col: self.prev_loc.col + 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn discard(&mut self) {
|
||||
|
||||
self.prev_char = self.r.next_char().
|
||||
expect("discard should only get called after peek").
|
||||
expect("discard should only get called after peek");
|
||||
|
||||
self.prev_loc = self.next_loc();
|
||||
}
|
||||
|
||||
fn peek_a_bool(&mut self) -> Result<(char, bool), Error> {
|
||||
if let Some(c) = self.r.peek_char()? {
|
||||
Ok((c, true))
|
||||
} else {
|
||||
Ok(('0', false))
|
||||
}
|
||||
}
|
||||
|
||||
fn discard_while(&mut self, pred: impl Fn(char) -> bool) -> Result<(), Error> {
|
||||
|
||||
loop {
|
||||
let (c, ok) = self.peek_a_bool()?;
|
||||
if !ok || !pred(c) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.discard();
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_token(
|
||||
&mut self,
|
||||
kind: TokenKind,
|
||||
pred: impl Fn(char) -> bool,
|
||||
) -> Result<Option<Token>, Error> {
|
||||
|
||||
let loc = self.next_loc();
|
||||
self.buf.truncate(0);
|
||||
|
||||
loop {
|
||||
|
||||
let (c, ok) = self.peek_a_bool()?;
|
||||
|
||||
if !ok || !pred(c) {
|
||||
return Ok(Some(Token{
|
||||
kind: kind,
|
||||
value: self.buf.clone(),
|
||||
location: loc,
|
||||
}))
|
||||
}
|
||||
|
||||
self.buf.push(c);
|
||||
self.discard();
|
||||
}
|
||||
}
|
||||
|
||||
fn is_number(c: char) -> bool {
|
||||
c == '-' || ('0' <= c && c <= '9')
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Result<Option<Token>, Error> {
|
||||
|
||||
loop {
|
||||
|
||||
let (c, ok) = self.peek_a_bool()?;
|
||||
if !ok {
|
||||
return Ok(None);
|
||||
|
||||
} else if c == '*' {
|
||||
self.discard_while(|c| c != '\n')?;
|
||||
// the terminating newline will be dealt with in the next loop
|
||||
|
||||
} else if c.is_letter() {
|
||||
return self.collect_token(
|
||||
TokenKind::Name,
|
||||
|c| c.is_letter() || c.is_number() || c.is_mark() || c == '-',
|
||||
);
|
||||
|
||||
} else if Self::is_number(c) {
|
||||
return self.collect_token(TokenKind::Number, Self::is_number);
|
||||
|
||||
} else if c.is_punctuation() {
|
||||
return self.collect_token(
|
||||
TokenKind::Punctuation,
|
||||
|c| c.is_punctuation() || c.is_symbol(),
|
||||
);
|
||||
|
||||
} else if c.is_ascii_whitespace() {
|
||||
self.discard_while(|c| c.is_ascii_whitespace())?;
|
||||
|
||||
} else {
|
||||
return Err(Error::Tokenizing("unexpected character", self.next_loc()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,3 +1,2 @@
|
||||
mod graph;
|
||||
|
||||
pub use graph::Graph;
|
||||
pub mod graph;
|
||||
pub mod gg;
|
||||
|
@ -1,4 +1,4 @@
|
||||
use ginger::Graph;
|
||||
use ginger::graph::Graph;
|
||||
|
||||
fn main() {
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user