285 lines
No EOL
5.2 KiB
Rust
285 lines
No EOL
5.2 KiB
Rust
use std::fmt;
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::{Token, Tokenizer, TokenizerError};
|
|
|
|
fn quick_tokenize(source: &str) -> Result<Vec<Token>, TokenizerError> {
|
|
let mut tokenizer = Tokenizer::new(String::from(source));
|
|
return tokenizer.tokenize();
|
|
}
|
|
|
|
fn open() -> Token {
|
|
Token::OpenParen
|
|
}
|
|
fn close() -> Token {
|
|
Token::CloseParen
|
|
}
|
|
fn iden(s: &str) -> Token {
|
|
Token::Identifier(String::from(s))
|
|
}
|
|
fn string(s: &str) -> Token {
|
|
Token::String(String::from(s))
|
|
}
|
|
fn int(n: i32) -> Token {
|
|
Token::Integer(n)
|
|
}
|
|
fn float(n: f32) -> Token {
|
|
Token::Float(n)
|
|
}
|
|
|
|
|
|
#[test]
|
|
fn test_hello_world() -> Result<(), TokenizerError> {
|
|
assert_eq!(
|
|
vec![
|
|
open(),
|
|
iden("print"),
|
|
string("Hello, World!"),
|
|
close()
|
|
],
|
|
quick_tokenize("
|
|
(print \"Hello, World!\")
|
|
")?
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_math() -> Result<(), TokenizerError> {
|
|
assert_eq!(
|
|
vec![
|
|
open(),
|
|
iden("print"),
|
|
open(),
|
|
iden("add"),
|
|
open(),
|
|
iden("div"),
|
|
float(4.5),
|
|
open(),
|
|
iden("sub"),
|
|
float(0.5),
|
|
float(0.2),
|
|
close(),
|
|
close(),
|
|
open(),
|
|
iden("mul"),
|
|
int(21),
|
|
float(0.05),
|
|
close(),
|
|
close(),
|
|
close()
|
|
],
|
|
quick_tokenize("
|
|
(print
|
|
(add
|
|
(div
|
|
4.5
|
|
(sub 0.5 0.2)
|
|
)
|
|
(mul 21 0.05)
|
|
)
|
|
)
|
|
")?
|
|
);
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Token {
|
|
OpenParen,
|
|
CloseParen,
|
|
Quote,
|
|
Unquote,
|
|
Identifier(String),
|
|
String(String),
|
|
Integer(i32),
|
|
Float(f32)
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct TokenizerError {
|
|
line: u64,
|
|
column: u64,
|
|
|
|
line_end: u64,
|
|
column_end: u64,
|
|
|
|
message: String,
|
|
}
|
|
|
|
impl fmt::Display for TokenizerError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
if self.line != self.line_end || self.column != self.column_end {
|
|
return write!(f, "Error from {}:{} to {}:{}, '{}'", self.line, self.column, self.line_end, self.column_end, self.message);
|
|
}
|
|
write!(f, "Error at {}:{}, '{}'", self.line, self.column, self.message)
|
|
}
|
|
}
|
|
|
|
impl std::error::Error for TokenizerError {}
|
|
|
|
pub struct Tokenizer {
|
|
code: String,
|
|
line: u64,
|
|
column: u64,
|
|
|
|
reading_string: bool,
|
|
escape_next_char: bool,
|
|
|
|
reading_num: bool,
|
|
is_float: bool,
|
|
|
|
reading_identifier: bool,
|
|
skipping_comment: bool,
|
|
|
|
storage: Vec<char>,
|
|
}
|
|
|
|
impl Tokenizer {
|
|
pub fn new(code: String) -> Tokenizer {
|
|
Self {
|
|
code,
|
|
line: 1,
|
|
column: 1,
|
|
|
|
reading_num: false,
|
|
is_float: false,
|
|
|
|
reading_string: false,
|
|
escape_next_char: false,
|
|
|
|
reading_identifier: false,
|
|
skipping_comment: false,
|
|
|
|
storage: Vec::new()
|
|
}
|
|
}
|
|
|
|
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
|
|
let mut tokens = Vec::new();
|
|
|
|
for char in self.code.chars() {
|
|
let line = self.line;
|
|
let column = self.column;
|
|
|
|
self.line += 1;
|
|
if char == '\n' {
|
|
self.column += 1;
|
|
self.line = 1;
|
|
}
|
|
|
|
if self.skipping_comment {
|
|
match char {
|
|
'\n' => {
|
|
self.skipping_comment = false;
|
|
},
|
|
_ => continue
|
|
}
|
|
}
|
|
|
|
if self.reading_identifier {
|
|
if !char.is_alphabetic() {
|
|
self.reading_identifier = false;
|
|
|
|
tokens.push(Token::Identifier(self.storage.iter().collect()));
|
|
self.storage.clear();
|
|
continue;
|
|
}
|
|
|
|
self.storage.push(char);
|
|
continue;
|
|
}
|
|
|
|
if self.reading_num {
|
|
// Allow spacing numbers like 1_000_000
|
|
if !char.is_numeric() && char != '_' && char != '.' {
|
|
self.reading_num = false;
|
|
|
|
if self.is_float {
|
|
tokens.push(Token::Float(self.storage.iter().collect::<String>().parse().unwrap()));
|
|
} else {
|
|
tokens.push(Token::Integer(self.storage.iter().collect::<String>().parse().unwrap()));
|
|
}
|
|
|
|
self.is_float = false;
|
|
|
|
self.storage.clear();
|
|
} else {
|
|
if char == '.' {
|
|
self.is_float = true;
|
|
}
|
|
if char != '_' {
|
|
self.storage.push(char);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if self.reading_string {
|
|
if char == '"' && !self.escape_next_char {
|
|
self.reading_string = false;
|
|
|
|
tokens.push(Token::String(self.storage.iter().collect()));
|
|
self.storage.clear();
|
|
continue;
|
|
}
|
|
|
|
if char == '\n' {
|
|
return Err(TokenizerError {
|
|
line: line - self.storage.len() as u64 - 1,
|
|
column,
|
|
|
|
line_end: line,
|
|
column_end: column,
|
|
|
|
message: String::from("Expected \", got \\n"),
|
|
})
|
|
}
|
|
|
|
if char == '\\' {
|
|
self.escape_next_char = true;
|
|
continue;
|
|
}
|
|
|
|
self.storage.push(char);
|
|
|
|
continue;
|
|
}
|
|
|
|
match char {
|
|
';' => self.skipping_comment = true,
|
|
'(' => tokens.push(Token::OpenParen),
|
|
')' => tokens.push(Token::CloseParen),
|
|
'"' => {
|
|
self.reading_string = true;
|
|
self.storage.clear();
|
|
},
|
|
'\'' => tokens.push(Token::Quote),
|
|
',' => tokens.push(Token::Unquote),
|
|
c => {
|
|
if c.is_alphabetic() {
|
|
self.reading_identifier = true;
|
|
self.storage.clear();
|
|
self.storage.push(c);
|
|
continue;
|
|
} else if c.is_numeric() || c == '.' {
|
|
self.reading_num = true;
|
|
self.storage.clear();
|
|
if c.is_numeric() {
|
|
self.storage.push(c);
|
|
} else if c == '.' {
|
|
self.is_float = true;
|
|
self.storage.push('0');
|
|
self.storage.push(c);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(tokens)
|
|
}
|
|
} |