This repository has been archived on 2024-07-15. You can view files and clone it, but cannot push or open issues or pull requests.
holy_lisp_archive/src/tokenizer.rs

285 lines
No EOL
5.2 KiB
Rust

use std::fmt;
#[cfg(test)]
mod tests {
use crate::{Token, Tokenizer, TokenizerError};
fn quick_tokenize(source: &str) -> Result<Vec<Token>, TokenizerError> {
let mut tokenizer = Tokenizer::new(String::from(source));
return tokenizer.tokenize();
}
fn open() -> Token {
Token::OpenParen
}
fn close() -> Token {
Token::CloseParen
}
fn iden(s: &str) -> Token {
Token::Identifier(String::from(s))
}
fn string(s: &str) -> Token {
Token::String(String::from(s))
}
fn int(n: i32) -> Token {
Token::Integer(n)
}
fn float(n: f32) -> Token {
Token::Float(n)
}
#[test]
fn test_hello_world() -> Result<(), TokenizerError> {
assert_eq!(
vec![
open(),
iden("print"),
string("Hello, World!"),
close()
],
quick_tokenize("
(print \"Hello, World!\")
")?
);
Ok(())
}
#[test]
fn test_math() -> Result<(), TokenizerError> {
assert_eq!(
vec![
open(),
iden("print"),
open(),
iden("add"),
open(),
iden("div"),
float(4.5),
open(),
iden("sub"),
float(0.5),
float(0.2),
close(),
close(),
open(),
iden("mul"),
int(21),
float(0.05),
close(),
close(),
close()
],
quick_tokenize("
(print
(add
(div
4.5
(sub 0.5 0.2)
)
(mul 21 0.05)
)
)
")?
);
Ok(())
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
OpenParen,
CloseParen,
Quote,
Unquote,
Identifier(String),
String(String),
Integer(i32),
Float(f32)
}
#[derive(Debug)]
pub struct TokenizerError {
line: u64,
column: u64,
line_end: u64,
column_end: u64,
message: String,
}
impl fmt::Display for TokenizerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.line != self.line_end || self.column != self.column_end {
return write!(f, "Error from {}:{} to {}:{}, '{}'", self.line, self.column, self.line_end, self.column_end, self.message);
}
write!(f, "Error at {}:{}, '{}'", self.line, self.column, self.message)
}
}
impl std::error::Error for TokenizerError {}
pub struct Tokenizer {
code: String,
line: u64,
column: u64,
reading_string: bool,
escape_next_char: bool,
reading_num: bool,
is_float: bool,
reading_identifier: bool,
skipping_comment: bool,
storage: Vec<char>,
}
impl Tokenizer {
pub fn new(code: String) -> Tokenizer {
Self {
code,
line: 1,
column: 1,
reading_num: false,
is_float: false,
reading_string: false,
escape_next_char: false,
reading_identifier: false,
skipping_comment: false,
storage: Vec::new()
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
let mut tokens = Vec::new();
for char in self.code.chars() {
let line = self.line;
let column = self.column;
self.line += 1;
if char == '\n' {
self.column += 1;
self.line = 1;
}
if self.skipping_comment {
match char {
'\n' => {
self.skipping_comment = false;
},
_ => continue
}
}
if self.reading_identifier {
if !char.is_alphabetic() {
self.reading_identifier = false;
tokens.push(Token::Identifier(self.storage.iter().collect()));
self.storage.clear();
continue;
}
self.storage.push(char);
continue;
}
if self.reading_num {
// Allow spacing numbers like 1_000_000
if !char.is_numeric() && char != '_' && char != '.' {
self.reading_num = false;
if self.is_float {
tokens.push(Token::Float(self.storage.iter().collect::<String>().parse().unwrap()));
} else {
tokens.push(Token::Integer(self.storage.iter().collect::<String>().parse().unwrap()));
}
self.is_float = false;
self.storage.clear();
} else {
if char == '.' {
self.is_float = true;
}
if char != '_' {
self.storage.push(char);
}
continue;
}
}
if self.reading_string {
if char == '"' && !self.escape_next_char {
self.reading_string = false;
tokens.push(Token::String(self.storage.iter().collect()));
self.storage.clear();
continue;
}
if char == '\n' {
return Err(TokenizerError {
line: line - self.storage.len() as u64 - 1,
column,
line_end: line,
column_end: column,
message: String::from("Expected \", got \\n"),
})
}
if char == '\\' {
self.escape_next_char = true;
continue;
}
self.storage.push(char);
continue;
}
match char {
';' => self.skipping_comment = true,
'(' => tokens.push(Token::OpenParen),
')' => tokens.push(Token::CloseParen),
'"' => {
self.reading_string = true;
self.storage.clear();
},
'\'' => tokens.push(Token::Quote),
',' => tokens.push(Token::Unquote),
c => {
if c.is_alphabetic() {
self.reading_identifier = true;
self.storage.clear();
self.storage.push(c);
continue;
} else if c.is_numeric() || c == '.' {
self.reading_num = true;
self.storage.clear();
if c.is_numeric() {
self.storage.push(c);
} else if c == '.' {
self.is_float = true;
self.storage.push('0');
self.storage.push(c);
}
continue;
}
}
}
}
Ok(tokens)
}
}