This repository has been archived on 2024-07-15. You can view files and clone it, but cannot push or open issues or pull requests.
holy_lisp_archive/src/tokenizer.rs

174 lines
No EOL
3.3 KiB
Rust

use std::fmt;
#[derive(Debug, Clone)]
pub enum Token {
OpenParen,
CloseParen,
Identifier(String),
String(String),
Integer(i32)
}
pub struct TokenizerError {
line: u64,
column: u64,
line_end: u64,
column_end: u64,
message: String,
}
impl fmt::Display for TokenizerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.line != self.line_end || self.column != self.column_end {
return write!(f, "Error from {}:{} to {}:{}, '{}'", self.line, self.column, self.line_end, self.column_end, self.message);
}
write!(f, "Error at {}:{}, '{}'", self.line, self.column, self.message)
}
}
pub struct Tokenizer {
code: String,
line: u64,
column: u64,
reading_string: bool,
escape_next_char: bool,
reading_num: bool,
reading_identifier: bool,
skipping_comment: bool,
storage: Vec<char>,
}
impl Tokenizer {
pub fn new(code: String) -> Tokenizer {
Self {
code,
line: 1,
column: 1,
reading_num: false,
reading_string: false,
escape_next_char: false,
reading_identifier: false,
skipping_comment: false,
storage: Vec::new()
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
let mut tokens = Vec::new();
for char in self.code.chars() {
let line = self.line;
let column = self.column;
self.line += 1;
if char == '\n' {
self.column += 1;
self.line = 1;
}
if self.skipping_comment {
match char {
'\n' => {
self.skipping_comment = false;
},
_ => continue
}
}
if self.reading_identifier {
if !char.is_alphabetic() {
self.reading_identifier = false;
tokens.push(Token::Identifier(self.storage.iter().collect()));
self.storage.clear();
}
self.storage.push(char);
continue;
}
if self.reading_num {
// Allow spacing numbers like 1_000_000
if !char.is_numeric() && char != '_' {
self.reading_num = false;
tokens.push(Token::Integer(self.storage.iter().collect::<String>().parse().unwrap()));
self.storage.clear();
}
}
if self.reading_string {
if char == '"' && !self.escape_next_char {
self.reading_string = false;
tokens.push(Token::String(self.storage.iter().collect()));
self.storage.clear();
continue;
}
if char == '\n' {
return Err(TokenizerError {
line: line - self.storage.len() as u64 - 1,
column,
line_end: line,
column_end: column,
message: String::from("Expected \", got \\n"),
})
}
if char == '\\' {
self.escape_next_char = true;
continue;
}
self.storage.push(char);
continue;
}
match char {
';' => self.skipping_comment = true,
'(' => tokens.push(Token::OpenParen),
')' => tokens.push(Token::CloseParen),
'"' => {
self.reading_string = true;
self.storage.clear();
},
c => {
if c.is_alphabetic() {
self.reading_identifier = true;
self.storage.clear();
self.storage.push(c);
continue;
// Allow numbers to also start with _ for fun
// ______100_0__: 1,000
// TODO: delete this when I expand identifiers to include more symbols
} else if c.is_numeric() || c == '_' {
self.reading_num = true;
self.storage.clear();
self.storage.push(c);
continue;
}
}
}
}
Ok(tokens)
}
}