Files
search-ast-parser-js/src/parser.ts
2023-04-29 16:53:23 +03:00

227 lines
5.9 KiB
TypeScript

import { InputReader } from './reader'
import { ILexer, LexerToken, LexerTokenValue } from './lexer'
export interface ParserTokenValue {
type: 'word' | 'operator' | 'phrase' | 'group'
}
export interface Phrase extends ParserTokenValue {
type: 'phrase'
value: string
quote: "'" | '"'
}
export interface Word extends ParserTokenValue {
type: 'word'
value: string
}
export interface Operator extends ParserTokenValue {
type: 'operator'
value: string
left: any
right: any
}
export interface Group extends ParserTokenValue {
type: 'group'
children: any[]
}
export type ParserToken = Phrase | Word | Operator | Group
export abstract class IParser {
public lexer: ILexer
public abstract index: number
constructor(lexer: ILexer) {
this.lexer = lexer
}
public abstract peek(amount?: number): ParserToken | null
public abstract consume(amount?: number): ParserToken | null
public abstract parse(): ParserToken[]
public abstract isEOF(): boolean
}
export enum ParserState {
default,
}
export class Parser extends IParser {
index = 0
state = ParserState.default
cache: ParserToken[] = []
constructor(lexer: ILexer) {
super(lexer)
this.state = ParserState.default
}
public peek(amount = 0): ParserToken | null {
const cacheIndex = this.index + amount
if (cacheIndex < this.cache.length) {
return this.cache[cacheIndex]
}
if (this.isEOF()) {
return null
}
this.fillCache(cacheIndex)
const token = this.cache[cacheIndex]
return token
}
public consume(amount = 0): ParserToken | null {
const cacheIndex = this.index + amount
this.index = cacheIndex + 1
if (this.cache[cacheIndex]) {
return this.cache[cacheIndex]
}
if (this.isEOF()) {
return null
}
this.fillCache(cacheIndex)
const token = this.cache[cacheIndex]
return token
}
private fillCache(n: number) {
const { index } = this
for (let i = 0; i <= n; i++) {
this.index = i
if (this.isEOF()) {
return
}
if (this.cache[i]) {
continue
}
const value = this.readNextToken()
this.cache[i] = value!
}
this.index = index
}
public parse(): ParserToken[] {
const tokens: ParserToken[] = []
while (!this.isEOF()) {
const token = this.consume()
if (!token) {
return tokens
}
if (token.type === 'operator' && tokens.length && token.left === undefined) {
token.left = tokens.pop()
}
tokens.push(token)
}
return tokens
}
public isEOF(): boolean {
return this.lexer.isEOF()
}
private readNextToken(): ParserToken | null {
let token = this.lexer.peek()
let nextToken = this.lexer.peek(1)
switch (this.state) {
case ParserState.default:
if (token?.token === 'whitespace') {
this.index++
this.lexer.consume()
return this.readNextToken()
}
nextToken = this.peekSkipWhitespace(nextToken)
// lookahead
switch (nextToken?.token) {
case LexerToken.operator:
// this.index++
this.lexer.consume()
const parsed = this.parseNormalLexToken(token!)!
const nextParsed = this.readNextToken()!
this.index++
this.lexer.consume()
return this.consumeOperator(parsed, nextToken, nextParsed)
case LexerToken.group:
if (nextToken.value === ')') {
return this.parseNormalLexToken(token)
}
if (token?.token == 'operator') {
return this.consumeOperator(this.cache[this.index - 1], token, this.readNextToken()!)
}
this.index++
this.lexer.consume()
return this.consumeGroup(nextToken!)
}
// no special token coming up, proceed with this token
this.lexer.consume()
return this.parseNormalLexToken(token)
default:
throw new Error('Bad state')
}
}
private peekSkipWhitespace(nextToken: LexerTokenValue | null) {
while (nextToken && nextToken.token === 'whitespace') {
this.lexer.consume()
nextToken = this.lexer.peek()
}
return nextToken
}
private parseNormalLexToken(token: LexerTokenValue | null): ParserToken | null {
switch (token?.token) {
case LexerToken.word:
return this.consumeWord(token)
case LexerToken.quote:
// TODO might need to reconsider consuming here.
const quoteContent = this.lexer.consume()!
this.lexer.consume()
return this.consumePhrase(token, quoteContent)
case LexerToken.group:
// this.lexer.consume()
return this.consumeGroup(token!)
// case LexerToken.operator:
// return this.consumeOperator(this.cache[this.index - 1], token, this.readNextToken())
default:
return null
}
}
private consumeWord(word: LexerTokenValue): ParserToken | null {
// TODO might need to reconsider consuming here.
return { type: 'word', value: word.value }
}
private consumePhrase(token: LexerTokenValue, quoteContent: LexerTokenValue): ParserToken | null {
// TODO might need to reconsider consuming here.
return { type: 'phrase', value: quoteContent.value, quote: token.value as '"' }
}
private consumeOperator(
left: ParserToken,
opToken: LexerTokenValue,
right: ParserToken,
): ParserToken | null {
// this.lexer.consume()
return { type: 'operator', value: opToken.value, left, right }
}
private consumeGroup(token: LexerTokenValue): ParserToken | null {
const children: ParserToken[] = []
let nextToken = this.peekSkipWhitespace(this.lexer.peek())
while (nextToken && nextToken?.value !== ')') {
const child = this.readNextToken()
if (child) {
children.push(child)
}
nextToken = this.lexer.peek()
}
this.lexer.consume()
return { type: 'group', children }
}
}