js实现 有限状态自动机 实现的词法分析器
class TokenNumberLiteral{
constructor(n) {
this.data = n // number
}
}
class TokenIdentifier{
constructor(s) {
this.data = s // 变量
}
}
class TokenMinus{} // -
class TokenPlus{} // +
class TokenSlash{} // /
class TokenStar{} // *
class TokenWhitespace{} // 空格
let is_digit = (c) => {
return c >= "0" && c <= "9"
}
let is_dot = (c) => {
return c == "."
}
let is_alpha = (c) => {
return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z") || c == "_"
}
let is_alphanumeric = (c) => {
return is_digit(c) || is_alpha(c)
}
let is_whitespace = (c) => {
if(c == " " || c == "\r" || c == "\t" || c == "\n") {
return true
}
return false
}
class TokenPosition {
constructor() {
this.line = 1
this.column = 1
}
}
class TokenWithContext{
constructor(token, lexeme, position) {
this.token = token
this.lexeme = lexeme
this.position = position
}
}
class Scanner {
constructor() {
this.source = `123.23a_123 + a
2 + 3`
this.peek_status = 0
this.scan_status = 0
this.current_lexeme = ""
this.current_position = new TokenPosition()
}
peek() {
let peek_char = this.source[this.peek_status]
if (peek_char) {
this.peek_status ++
} else {
peek_char = null
}
return peek_char
}
reset_peek(){
this.peek_status = this.scan_status
}
advance(){
let next_char = this.source[this.scan_status]
if (next_char) {
this.current_lexeme += next_char
if(next_char == "\n") {
this.current_position.line ++
this.current_position.column = 1
} else {
this.current_position.column ++
}
this.peek_status ++
this.scan_status ++
} else {
next_char = null
}
return next_char
}
advance_while(f){
while (this.peek_check1(f)) {
this.advance()
}
}
peek_check1(f) {
this.reset_peek()
let peek_char = this.peek()
if(peek_char != null) {
return f(peek_char)
}
return false
}
peek_check2(f1, f2) {
this.reset_peek()
let peek_char_1 = this.peek()
if(peek_char_1 != null) {
let peek_char_2 = this.peek()
if (peek_char_2 != null) {
return f1(peek_char_1) && f2(peek_char_2)
} else {
return false
}
} else {
return false
}
}
number(){
this.advance_while(is_digit)
if (this.peek_check2(is_dot, is_digit)) {
this.advance()
this.advance_while(is_digit)
}
let num = Number(this.current_lexeme)
return new TokenNumberLiteral(num)
}
identifier() {
this.advance_while(is_alphanumeric)
return new TokenIdentifier(this.current_lexeme)
}
scan_next(){
let current_position = this.current_position
this.current_lexeme = ""
let next_char = this.advance()
if(next_char == null){
return null
}
let token = null
if (next_char == "+") {
token = new TokenPlus()
} else if (next_char == "-") {
token = new TokenMinus()
} else if (next_char == "*") {
token = new TokenStar()
} else if (next_char == "/") {
token = new TokenSlash()
} else if(is_whitespace(next_char)) {
token = new TokenWhitespace
} else if (is_digit(next_char)) {
token = this.number()
} else if (is_alpha) {
token = this.identifier()
}
return new TokenWithContext(token, this.current_lexeme, current_position)
}
}
let s = new Scanner()
while (true) {
let token = s.scan_next()
if (token == null) {
break
}
console.log(token)
}