Very basic tokenizer implementation
This commit is contained in:
parent
71f8fbd748
commit
d7314de57a
3 changed files with 149 additions and 11 deletions
142
src/lib.rs
142
src/lib.rs
|
@ -1,3 +1,6 @@
|
|||
use std::fmt;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
OpenParen,
|
||||
CloseParen,
|
||||
|
@ -6,16 +9,143 @@ pub enum Token {
|
|||
Integer(i32)
|
||||
}
|
||||
|
||||
pub trait Tokenizable {
|
||||
fn tokenize(&self) -> Vec<Token>;
|
||||
|
||||
pub struct TokenizerError {
|
||||
line: u64,
|
||||
column: u64,
|
||||
|
||||
line_end: u64,
|
||||
column_end: u64,
|
||||
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl Tokenizable for String {
|
||||
fn tokenize(&self) -> Vec<Token> {
|
||||
let tokens = Vec::new();
|
||||
impl fmt::Display for TokenizerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if self.line != self.line_end || self.column != self.column_end {
|
||||
return write!(f, "Error from {}:{} to {}:{}, '{}'", self.line, self.column, self.line_end, self.column_end, self.message);
|
||||
}
|
||||
write!(f, "Error at {}:{}, '{}'", self.line, self.column, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Tokenizer {
|
||||
code: String,
|
||||
line: u64,
|
||||
column: u64,
|
||||
|
||||
reading_string: bool,
|
||||
escape_next_char: bool,
|
||||
|
||||
tokens
|
||||
reading_identifier: bool,
|
||||
skipping_comment: bool,
|
||||
|
||||
storage: Vec<char>,
|
||||
}
|
||||
|
||||
impl Tokenizer {
|
||||
pub fn new(code: String) -> Tokenizer {
|
||||
Self {
|
||||
code,
|
||||
line: 1,
|
||||
column: 1,
|
||||
|
||||
reading_string: false,
|
||||
escape_next_char: false,
|
||||
|
||||
reading_identifier: false,
|
||||
skipping_comment: false,
|
||||
|
||||
storage: Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
for char in self.code.chars() {
|
||||
let line = self.line;
|
||||
let column = self.column;
|
||||
|
||||
self.line += 1;
|
||||
if char == '\n' {
|
||||
self.column += 1;
|
||||
self.line = 1;
|
||||
}
|
||||
|
||||
if self.skipping_comment {
|
||||
match char {
|
||||
'\n' => {
|
||||
self.skipping_comment = false;
|
||||
},
|
||||
_ => continue
|
||||
}
|
||||
}
|
||||
|
||||
if self.reading_identifier {
|
||||
if !char.is_alphabetic() {
|
||||
self.reading_identifier = false;
|
||||
|
||||
tokens.push(Token::Identifier(self.storage.iter().collect()));
|
||||
self.storage.clear();
|
||||
}
|
||||
|
||||
self.storage.push(char);
|
||||
continue;
|
||||
}
|
||||
|
||||
if self.reading_string {
|
||||
if char == '"' && !self.escape_next_char {
|
||||
self.reading_string = false;
|
||||
|
||||
tokens.push(Token::String(self.storage.iter().collect()));
|
||||
self.storage.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
if char == '\n' {
|
||||
return Err(TokenizerError {
|
||||
line: line - self.storage.len() as u64 - 1,
|
||||
column,
|
||||
|
||||
line_end: line,
|
||||
column_end: column,
|
||||
|
||||
message: String::from("Expected \", got \\n"),
|
||||
})
|
||||
}
|
||||
|
||||
if char == '\\' {
|
||||
self.escape_next_char = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
self.storage.push(char);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
match char {
|
||||
';' => self.skipping_comment = true,
|
||||
'(' => tokens.push(Token::OpenParen),
|
||||
')' => tokens.push(Token::CloseParen),
|
||||
'"' => {
|
||||
self.reading_string = true;
|
||||
self.storage.clear();
|
||||
},
|
||||
c => {
|
||||
if c.is_alphabetic() {
|
||||
self.reading_identifier = true;
|
||||
self.storage.clear();
|
||||
self.storage.push(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
print!("{}", char)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
12
src/main.rs
12
src/main.rs
|
@ -1,4 +1,14 @@
|
|||
use lisp_stuff::*;
|
||||
|
||||
fn main() {
|
||||
let source = std::fs::read_to_string("src/test.lisp").unwrap();
|
||||
println!("{}", source);
|
||||
let mut tokenizer = Tokenizer::new(source);
|
||||
let tokens = match tokenizer.tokenize() {
|
||||
Ok(tokens) => tokens,
|
||||
Err(e) => {
|
||||
println!("{}", e);
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
println!("{:?}", tokens);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
; This is a comment
|
||||
(print "Hello, World")
|
||||
(print (add 1 2))
|
||||
|
||||
; OpenParen, Identifier, String, CloseParen
|
||||
; OpenParen, Identifier, OpenParen, Identifier, Int, Int, CloseParen, CloseParen
|
||||
(print "Hello, World") ; OpenParen, Identifier, String, CloseParen
|
||||
; (print (add 1 2)) ; OpenParen, Identifier, OpenParen, Identifier, Int, Int, CloseParen, CloseParen
|
Reference in a new issue