mirror of
https://github.com/chenasraf/search-ast-parser-js.git
synced 2026-05-17 17:48:09 +00:00
feat(tokenizer): update phrase
This commit is contained in:
@@ -2,20 +2,74 @@ import { StringReader } from '../src/reader'
|
||||
import { Token, Tokenizer } from '../src/tokenizer'
|
||||
|
||||
describe('Tokenizer', () => {
|
||||
test('Should tokenize phrase', () => {
|
||||
const reader = new StringReader('"phrase"')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
const token = tokenizer.peek()
|
||||
expect(token.token).toBe(Token.phrase)
|
||||
expect(token.value).toBe('"phrase"')
|
||||
describe('Phrase', () => {
|
||||
test('Should tokenize single', () => {
|
||||
const reader = new StringReader('"phrase"')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
const tokens = tokenizer.read()
|
||||
|
||||
expect(tokens[0].token).toBe(Token.quote)
|
||||
expect(tokens[0].value).toBe('"')
|
||||
|
||||
expect(tokens[1].token).toBe(Token.word)
|
||||
expect(tokens[1].value).toBe('phrase')
|
||||
|
||||
expect(tokens[2].token).toBe(Token.quote)
|
||||
expect(tokens[2].value).toBe('"')
|
||||
})
|
||||
|
||||
test('Should tokenize multi', () => {
|
||||
const reader = new StringReader('"one two three 123 !@#"')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
const tokens = tokenizer.read()
|
||||
|
||||
expect(tokens[0].token).toBe(Token.quote)
|
||||
expect(tokens[0].value).toBe('"')
|
||||
|
||||
expect(tokens[1].token).toBe(Token.word)
|
||||
expect(tokens[1].value).toBe('one two three 123 !@#')
|
||||
|
||||
expect(tokens[2].token).toBe(Token.quote)
|
||||
expect(tokens[2].value).toBe('"')
|
||||
})
|
||||
})
|
||||
|
||||
test('Should tokenize word', () => {
|
||||
const reader = new StringReader('word')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
const token = tokenizer.peek()
|
||||
expect(token.token).toBe(Token.word)
|
||||
expect(token.value).toBe('word')
|
||||
describe('Word', () => {
|
||||
test('Should tokenize single', () => {
|
||||
const reader = new StringReader('word')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
const tokens = tokenizer.read()
|
||||
|
||||
expect(tokens[0].token).toBe(Token.word)
|
||||
expect(tokens[0].value).toBe('word')
|
||||
})
|
||||
|
||||
test('Should tokenize multi', () => {
|
||||
const reader = new StringReader('one two three 123')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
const tokens = tokenizer.read()
|
||||
|
||||
expect(tokens[0].token).toBe(Token.word)
|
||||
expect(tokens[0].value).toBe('one')
|
||||
|
||||
expect(tokens[1].token).toBe(Token.whitespace)
|
||||
expect(tokens[1].value).toBe(' ')
|
||||
|
||||
expect(tokens[2].token).toBe(Token.word)
|
||||
expect(tokens[2].value).toBe('two')
|
||||
|
||||
expect(tokens[3].token).toBe(Token.whitespace)
|
||||
expect(tokens[3].value).toBe(' ')
|
||||
|
||||
expect(tokens[4].token).toBe(Token.word)
|
||||
expect(tokens[4].value).toBe('three')
|
||||
|
||||
expect(tokens[5].token).toBe(Token.whitespace)
|
||||
expect(tokens[5].value).toBe(' ')
|
||||
|
||||
expect(tokens[6].token).toBe(Token.word)
|
||||
expect(tokens[6].value).toBe('123')
|
||||
})
|
||||
})
|
||||
|
||||
describe('Groups', () => {
|
||||
|
||||
@@ -3,16 +3,16 @@ import { InputReader } from './reader'
|
||||
export enum TokenizerState {
|
||||
default,
|
||||
inPhrase,
|
||||
// inGroup,
|
||||
}
|
||||
|
||||
export enum Token {
|
||||
phrase = 'phrase',
|
||||
// phrase = 'phrase',
|
||||
group = 'group',
|
||||
operator = 'operator',
|
||||
word = 'word',
|
||||
quote = 'quote',
|
||||
whitespace = 'whitespace',
|
||||
eof = 'eof',
|
||||
// eof = 'eof',
|
||||
}
|
||||
|
||||
export interface TokenValue {
|
||||
@@ -52,7 +52,7 @@ export class Tokenizer implements InputReader<TokenValue> {
|
||||
if (`"'`.includes(nextChar)) {
|
||||
this.state = TokenizerState.inPhrase
|
||||
this.quoteTerminator = nextChar
|
||||
return this.consumePhrase()
|
||||
return this.consumeQuote()
|
||||
}
|
||||
|
||||
if (this.isAlphanumeric(nextChar.charCodeAt(0))) {
|
||||
@@ -74,21 +74,27 @@ export class Tokenizer implements InputReader<TokenValue> {
|
||||
}
|
||||
|
||||
if (nextChar === '(' || nextChar === ')') {
|
||||
// this.state = TokenizerState.inGroup
|
||||
return this.consumeGroup()
|
||||
}
|
||||
return this.consumeWord()
|
||||
case TokenizerState.inPhrase:
|
||||
if (nextChar === this.quoteTerminator) {
|
||||
this.state = TokenizerState.default
|
||||
return this.consumePhrase()
|
||||
return this.consumeQuote()
|
||||
}
|
||||
return this.consumeWord()
|
||||
return this.consumePhrase()
|
||||
default:
|
||||
throw new Error('bad state')
|
||||
}
|
||||
}
|
||||
|
||||
consumeQuote(): TokenValue {
|
||||
return {
|
||||
value: this.reader.consume(),
|
||||
token: Token.quote,
|
||||
}
|
||||
}
|
||||
|
||||
consumeAnd(): TokenValue {
|
||||
let value = ''
|
||||
if (this.confirmExactWord('AND')) {
|
||||
@@ -157,11 +163,9 @@ export class Tokenizer implements InputReader<TokenValue> {
|
||||
while ((nextChar = this.reader.peek()) && nextChar !== this.quoteTerminator) {
|
||||
value += this.reader.consume()
|
||||
}
|
||||
value += nextChar
|
||||
this.reader.consume()
|
||||
return {
|
||||
value,
|
||||
token: Token.phrase,
|
||||
token: Token.word,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user