feat(tokenizer): update phrase

This commit is contained in:
Chen Asraf
2022-08-14 20:26:30 +03:00
parent 89ec5ebd80
commit 4ffd52714d
2 changed files with 80 additions and 22 deletions

View File

@@ -2,20 +2,74 @@ import { StringReader } from '../src/reader'
import { Token, Tokenizer } from '../src/tokenizer'
describe('Tokenizer', () => {
test('Should tokenize phrase', () => {
const reader = new StringReader('"phrase"')
const tokenizer = new Tokenizer(reader)
const token = tokenizer.peek()
expect(token.token).toBe(Token.phrase)
expect(token.value).toBe('"phrase"')
describe('Phrase', () => {
test('Should tokenize single', () => {
const reader = new StringReader('"phrase"')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
expect(tokens[0].token).toBe(Token.quote)
expect(tokens[0].value).toBe('"')
expect(tokens[1].token).toBe(Token.word)
expect(tokens[1].value).toBe('phrase')
expect(tokens[2].token).toBe(Token.quote)
expect(tokens[2].value).toBe('"')
})
test('Should tokenize multi', () => {
const reader = new StringReader('"one two three 123 !@#"')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
expect(tokens[0].token).toBe(Token.quote)
expect(tokens[0].value).toBe('"')
expect(tokens[1].token).toBe(Token.word)
expect(tokens[1].value).toBe('one two three 123 !@#')
expect(tokens[2].token).toBe(Token.quote)
expect(tokens[2].value).toBe('"')
})
})
test('Should tokenize word', () => {
const reader = new StringReader('word')
const tokenizer = new Tokenizer(reader)
const token = tokenizer.peek()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
describe('Word', () => {
test('Should tokenize single', () => {
const reader = new StringReader('word')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
expect(tokens[0].token).toBe(Token.word)
expect(tokens[0].value).toBe('word')
})
test('Should tokenize multi', () => {
const reader = new StringReader('one two three 123')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
expect(tokens[0].token).toBe(Token.word)
expect(tokens[0].value).toBe('one')
expect(tokens[1].token).toBe(Token.whitespace)
expect(tokens[1].value).toBe(' ')
expect(tokens[2].token).toBe(Token.word)
expect(tokens[2].value).toBe('two')
expect(tokens[3].token).toBe(Token.whitespace)
expect(tokens[3].value).toBe(' ')
expect(tokens[4].token).toBe(Token.word)
expect(tokens[4].value).toBe('three')
expect(tokens[5].token).toBe(Token.whitespace)
expect(tokens[5].value).toBe(' ')
expect(tokens[6].token).toBe(Token.word)
expect(tokens[6].value).toBe('123')
})
})
describe('Groups', () => {

View File

@@ -3,16 +3,16 @@ import { InputReader } from './reader'
export enum TokenizerState {
default,
inPhrase,
// inGroup,
}
export enum Token {
phrase = 'phrase',
// phrase = 'phrase',
group = 'group',
operator = 'operator',
word = 'word',
quote = 'quote',
whitespace = 'whitespace',
eof = 'eof',
// eof = 'eof',
}
export interface TokenValue {
@@ -52,7 +52,7 @@ export class Tokenizer implements InputReader<TokenValue> {
if (`"'`.includes(nextChar)) {
this.state = TokenizerState.inPhrase
this.quoteTerminator = nextChar
return this.consumePhrase()
return this.consumeQuote()
}
if (this.isAlphanumeric(nextChar.charCodeAt(0))) {
@@ -74,21 +74,27 @@ export class Tokenizer implements InputReader<TokenValue> {
}
if (nextChar === '(' || nextChar === ')') {
// this.state = TokenizerState.inGroup
return this.consumeGroup()
}
return this.consumeWord()
case TokenizerState.inPhrase:
if (nextChar === this.quoteTerminator) {
this.state = TokenizerState.default
return this.consumePhrase()
return this.consumeQuote()
}
return this.consumeWord()
return this.consumePhrase()
default:
throw new Error('bad state')
}
}
consumeQuote(): TokenValue {
return {
value: this.reader.consume(),
token: Token.quote,
}
}
consumeAnd(): TokenValue {
let value = ''
if (this.confirmExactWord('AND')) {
@@ -157,11 +163,9 @@ export class Tokenizer implements InputReader<TokenValue> {
while ((nextChar = this.reader.peek()) && nextChar !== this.quoteTerminator) {
value += this.reader.consume()
}
value += nextChar
this.reader.consume()
return {
value,
token: Token.phrase,
token: Token.word,
}
}