197 lines
No EOL
3.8 KiB
Rust
197 lines
No EOL
3.8 KiB
Rust
use std::fmt;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum Token {
|
|
OpenParen,
|
|
CloseParen,
|
|
Quote,
|
|
Unquote,
|
|
Identifier(String),
|
|
String(String),
|
|
Integer(i32),
|
|
Float(f32)
|
|
}
|
|
|
|
|
|
pub struct TokenizerError {
|
|
line: u64,
|
|
column: u64,
|
|
|
|
line_end: u64,
|
|
column_end: u64,
|
|
|
|
message: String,
|
|
}
|
|
|
|
impl fmt::Display for TokenizerError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
if self.line != self.line_end || self.column != self.column_end {
|
|
return write!(f, "Error from {}:{} to {}:{}, '{}'", self.line, self.column, self.line_end, self.column_end, self.message);
|
|
}
|
|
write!(f, "Error at {}:{}, '{}'", self.line, self.column, self.message)
|
|
}
|
|
}
|
|
|
|
pub struct Tokenizer {
|
|
code: String,
|
|
line: u64,
|
|
column: u64,
|
|
|
|
reading_string: bool,
|
|
escape_next_char: bool,
|
|
|
|
reading_num: bool,
|
|
is_float: bool,
|
|
|
|
reading_identifier: bool,
|
|
skipping_comment: bool,
|
|
|
|
storage: Vec<char>,
|
|
}
|
|
|
|
impl Tokenizer {
|
|
pub fn new(code: String) -> Tokenizer {
|
|
Self {
|
|
code,
|
|
line: 1,
|
|
column: 1,
|
|
|
|
reading_num: false,
|
|
is_float: false,
|
|
|
|
reading_string: false,
|
|
escape_next_char: false,
|
|
|
|
reading_identifier: false,
|
|
skipping_comment: false,
|
|
|
|
storage: Vec::new()
|
|
}
|
|
}
|
|
|
|
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
|
|
let mut tokens = Vec::new();
|
|
|
|
for char in self.code.chars() {
|
|
let line = self.line;
|
|
let column = self.column;
|
|
|
|
self.line += 1;
|
|
if char == '\n' {
|
|
self.column += 1;
|
|
self.line = 1;
|
|
}
|
|
|
|
if self.skipping_comment {
|
|
match char {
|
|
'\n' => {
|
|
self.skipping_comment = false;
|
|
},
|
|
_ => continue
|
|
}
|
|
}
|
|
|
|
if self.reading_identifier {
|
|
if !char.is_alphabetic() {
|
|
self.reading_identifier = false;
|
|
|
|
tokens.push(Token::Identifier(self.storage.iter().collect()));
|
|
self.storage.clear();
|
|
continue;
|
|
}
|
|
|
|
self.storage.push(char);
|
|
continue;
|
|
}
|
|
|
|
if self.reading_num {
|
|
// Allow spacing numbers like 1_000_000
|
|
if !char.is_numeric() && char != '_' && char != '.' {
|
|
self.reading_num = false;
|
|
|
|
if self.is_float {
|
|
tokens.push(Token::Float(self.storage.iter().collect::<String>().parse().unwrap()));
|
|
} else {
|
|
tokens.push(Token::Integer(self.storage.iter().collect::<String>().parse().unwrap()));
|
|
}
|
|
|
|
self.is_float = false;
|
|
|
|
self.storage.clear();
|
|
} else {
|
|
if char == '.' {
|
|
self.is_float = true;
|
|
}
|
|
if char != '_' {
|
|
self.storage.push(char);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if self.reading_string {
|
|
if char == '"' && !self.escape_next_char {
|
|
self.reading_string = false;
|
|
|
|
tokens.push(Token::String(self.storage.iter().collect()));
|
|
self.storage.clear();
|
|
continue;
|
|
}
|
|
|
|
if char == '\n' {
|
|
return Err(TokenizerError {
|
|
line: line - self.storage.len() as u64 - 1,
|
|
column,
|
|
|
|
line_end: line,
|
|
column_end: column,
|
|
|
|
message: String::from("Expected \", got \\n"),
|
|
})
|
|
}
|
|
|
|
if char == '\\' {
|
|
self.escape_next_char = true;
|
|
continue;
|
|
}
|
|
|
|
self.storage.push(char);
|
|
|
|
continue;
|
|
}
|
|
|
|
match char {
|
|
';' => self.skipping_comment = true,
|
|
'(' => tokens.push(Token::OpenParen),
|
|
')' => tokens.push(Token::CloseParen),
|
|
'"' => {
|
|
self.reading_string = true;
|
|
self.storage.clear();
|
|
},
|
|
'\'' => tokens.push(Token::Quote),
|
|
',' => tokens.push(Token::Unquote),
|
|
c => {
|
|
if c.is_alphabetic() {
|
|
self.reading_identifier = true;
|
|
self.storage.clear();
|
|
self.storage.push(c);
|
|
continue;
|
|
} else if c.is_numeric() || c == '.' {
|
|
self.reading_num = true;
|
|
self.storage.clear();
|
|
if c.is_numeric() {
|
|
self.storage.push(c);
|
|
} else if c == '.' {
|
|
self.is_float = true;
|
|
self.storage.push('0');
|
|
self.storage.push(c);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(tokens)
|
|
}
|
|
} |