use std::fmt; #[cfg(test)] mod tests { use crate::{Token, Tokenizer, TokenizerError}; fn quick_tokenize(source: &str) -> Result, TokenizerError> { let mut tokenizer = Tokenizer::new(String::from(source)); return tokenizer.tokenize(); } fn open() -> Token { Token::OpenParen } fn close() -> Token { Token::CloseParen } fn iden(s: &str) -> Token { Token::Identifier(String::from(s)) } fn string(s: &str) -> Token { Token::String(String::from(s)) } fn int(n: i32) -> Token { Token::Integer(n) } fn float(n: f32) -> Token { Token::Float(n) } #[test] fn test_hello_world() -> Result<(), TokenizerError> { assert_eq!( vec![ open(), iden("print"), string("Hello, World!"), close() ], quick_tokenize(" (print \"Hello, World!\") ")? ); Ok(()) } #[test] fn test_math() -> Result<(), TokenizerError> { assert_eq!( vec![ open(), iden("print"), open(), iden("add"), open(), iden("div"), float(4.5), open(), iden("sub"), float(0.5), float(0.2), close(), close(), open(), iden("mul"), int(21), float(0.05), close(), close(), close() ], quick_tokenize(" (print (add (div 4.5 (sub 0.5 0.2) ) (mul 21 0.05) ) ) ")? ); Ok(()) } } #[derive(Debug, Clone, PartialEq)] pub enum Token { OpenParen, CloseParen, Quote, Unquote, Identifier(String), String(String), Integer(i32), Float(f32) } #[derive(Debug)] pub struct TokenizerError { line: u64, column: u64, line_end: u64, column_end: u64, message: String, } impl fmt::Display for TokenizerError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.line != self.line_end || self.column != self.column_end { return write!(f, "Error from {}:{} to {}:{}, '{}'", self.line, self.column, self.line_end, self.column_end, self.message); } write!(f, "Error at {}:{}, '{}'", self.line, self.column, self.message) } } pub struct Tokenizer { code: String, line: u64, column: u64, reading_string: bool, escape_next_char: bool, reading_num: bool, is_float: bool, reading_identifier: bool, skipping_comment: bool, storage: Vec, } impl Tokenizer { pub fn new(code: String) -> Tokenizer { Self { code, line: 1, column: 1, reading_num: false, is_float: false, reading_string: false, escape_next_char: false, reading_identifier: false, skipping_comment: false, storage: Vec::new() } } pub fn tokenize(&mut self) -> Result, TokenizerError> { let mut tokens = Vec::new(); for char in self.code.chars() { let line = self.line; let column = self.column; self.line += 1; if char == '\n' { self.column += 1; self.line = 1; } if self.skipping_comment { match char { '\n' => { self.skipping_comment = false; }, _ => continue } } if self.reading_identifier { if !char.is_alphabetic() { self.reading_identifier = false; tokens.push(Token::Identifier(self.storage.iter().collect())); self.storage.clear(); continue; } self.storage.push(char); continue; } if self.reading_num { // Allow spacing numbers like 1_000_000 if !char.is_numeric() && char != '_' && char != '.' { self.reading_num = false; if self.is_float { tokens.push(Token::Float(self.storage.iter().collect::().parse().unwrap())); } else { tokens.push(Token::Integer(self.storage.iter().collect::().parse().unwrap())); } self.is_float = false; self.storage.clear(); } else { if char == '.' { self.is_float = true; } if char != '_' { self.storage.push(char); } continue; } } if self.reading_string { if char == '"' && !self.escape_next_char { self.reading_string = false; tokens.push(Token::String(self.storage.iter().collect())); self.storage.clear(); continue; } if char == '\n' { return Err(TokenizerError { line: line - self.storage.len() as u64 - 1, column, line_end: line, column_end: column, message: String::from("Expected \", got \\n"), }) } if char == '\\' { self.escape_next_char = true; continue; } self.storage.push(char); continue; } match char { ';' => self.skipping_comment = true, '(' => tokens.push(Token::OpenParen), ')' => tokens.push(Token::CloseParen), '"' => { self.reading_string = true; self.storage.clear(); }, '\'' => tokens.push(Token::Quote), ',' => tokens.push(Token::Unquote), c => { if c.is_alphabetic() { self.reading_identifier = true; self.storage.clear(); self.storage.push(c); continue; } else if c.is_numeric() || c == '.' { self.reading_num = true; self.storage.clear(); if c.is_numeric() { self.storage.push(c); } else if c == '.' { self.is_float = true; self.storage.push('0'); self.storage.push(c); } continue; } } } } Ok(tokens) } }