mirror of
https://github.com/chenasraf/search-ast-parser-js.git
synced 2026-05-17 17:48:09 +00:00
fix(tokenizer): logical OR/AND group
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -1,2 +1,4 @@
|
||||
/coverage
|
||||
/node_modules
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
|
||||
@@ -18,19 +18,88 @@ describe('Tokenizer', () => {
|
||||
expect(token.value).toBe('word')
|
||||
})
|
||||
|
||||
test('Should tokenize group', () => {
|
||||
const reader = new StringReader('(word)')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
let token = tokenizer.peek()
|
||||
tokenizer.consume()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe('(')
|
||||
token = tokenizer.peek()
|
||||
tokenizer.consume()
|
||||
expect(token.token).toBe(Token.word)
|
||||
expect(token.value).toBe('word')
|
||||
token = tokenizer.peek()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe(')')
|
||||
describe('Groups', () => {
|
||||
it('Should tokenize single-word group', () => {
|
||||
const reader = new StringReader('(word)')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
|
||||
let token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe('(')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.word)
|
||||
expect(token.value).toBe('word')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe(')')
|
||||
})
|
||||
|
||||
it('Should tokenize logical operator OR group', () => {
|
||||
const reader = new StringReader('(word OR word)')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
|
||||
let token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe('(')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.word)
|
||||
expect(token.value).toBe('word')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.whitespace)
|
||||
expect(token.value).toBe(' ')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.operator)
|
||||
expect(token.value).toBe('or')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.whitespace)
|
||||
expect(token.value).toBe(' ')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.word)
|
||||
expect(token.value).toBe('word')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe(')')
|
||||
})
|
||||
|
||||
it('Should tokenize logical operator AND group', () => {
|
||||
const reader = new StringReader('(word AND word)')
|
||||
const tokenizer = new Tokenizer(reader)
|
||||
|
||||
let token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe('(')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.word)
|
||||
expect(token.value).toBe('word')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.whitespace)
|
||||
expect(token.value).toBe(' ')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.operator)
|
||||
expect(token.value).toBe('and')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.whitespace)
|
||||
expect(token.value).toBe(' ')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.word)
|
||||
expect(token.value).toBe('word')
|
||||
|
||||
token = tokenizer.consume()
|
||||
expect(token.token).toBe(Token.group)
|
||||
expect(token.value).toBe(')')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -2,6 +2,7 @@ export abstract class InputReader<T> {
|
||||
public abstract peek(n?: number): T
|
||||
public abstract consume(n?: number): T
|
||||
public abstract setIndex(n: number): void
|
||||
public abstract isEOF(): boolean
|
||||
public index!: number
|
||||
}
|
||||
|
||||
@@ -27,6 +28,10 @@ export class BufferReader implements InputReader<string> {
|
||||
public setIndex(n: number): void {
|
||||
this.index = n
|
||||
}
|
||||
|
||||
public isEOF(): boolean {
|
||||
return this.index >= this.buffer.length
|
||||
}
|
||||
}
|
||||
|
||||
export class StringReader implements InputReader<string> {
|
||||
@@ -38,11 +43,11 @@ export class StringReader implements InputReader<string> {
|
||||
}
|
||||
|
||||
public peek(n = 0): string {
|
||||
return this.string.substring(this.index + n, this.index + 1)
|
||||
return this.string.substring(this.index + n, this.index + n + 1)
|
||||
}
|
||||
|
||||
public consume(n = 0): string {
|
||||
const result = this.string.substring(this.index + n, this.index + 1)
|
||||
const result = this.string.substring(this.index + n, this.index + n + 1)
|
||||
this.index++
|
||||
return result
|
||||
}
|
||||
@@ -50,4 +55,8 @@ export class StringReader implements InputReader<string> {
|
||||
public setIndex(n: number): void {
|
||||
this.index = n
|
||||
}
|
||||
|
||||
public isEOF(): boolean {
|
||||
return this.index >= this.string.length
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ import { InputReader } from './reader'
|
||||
export enum TokenizerState {
|
||||
default,
|
||||
inPhrase,
|
||||
inGroup,
|
||||
// inGroup,
|
||||
}
|
||||
|
||||
export enum Token {
|
||||
@@ -31,6 +31,10 @@ export class Tokenizer implements InputReader<TokenValue> {
|
||||
this.reader = reader
|
||||
}
|
||||
|
||||
public isEOF(): boolean {
|
||||
return this.reader.isEOF()
|
||||
}
|
||||
|
||||
public setIndex(n: number): void {
|
||||
this.index = n
|
||||
}
|
||||
@@ -65,8 +69,8 @@ export class Tokenizer implements InputReader<TokenValue> {
|
||||
return this.consumeWord()
|
||||
}
|
||||
|
||||
if (nextChar === '(') {
|
||||
this.state = TokenizerState.inGroup
|
||||
if (nextChar === '(' || nextChar === ')') {
|
||||
// this.state = TokenizerState.inGroup
|
||||
return this.consumeGroup()
|
||||
}
|
||||
return this.consumeWord()
|
||||
@@ -76,23 +80,22 @@ export class Tokenizer implements InputReader<TokenValue> {
|
||||
return this.consumePhrase()
|
||||
}
|
||||
return this.consumeWord()
|
||||
case TokenizerState.inGroup:
|
||||
if (nextChar === ')') {
|
||||
this.state = TokenizerState.default
|
||||
return this.consumeGroup()
|
||||
}
|
||||
return this.consumeWord()
|
||||
default:
|
||||
throw new Error('bad state')
|
||||
}
|
||||
}
|
||||
consumeAnd(): TokenValue {
|
||||
this.reader.consume()
|
||||
this.reader.consume()
|
||||
this.reader.consume()
|
||||
return {
|
||||
value: 'and',
|
||||
token: Token.operator,
|
||||
}
|
||||
}
|
||||
consumeOr(): TokenValue {
|
||||
this.reader.consume()
|
||||
this.reader.consume()
|
||||
return {
|
||||
value: 'or',
|
||||
token: Token.operator,
|
||||
@@ -155,6 +158,14 @@ export class Tokenizer implements InputReader<TokenValue> {
|
||||
return token
|
||||
}
|
||||
|
||||
public read(): TokenValue[] {
|
||||
const tokens: TokenValue[] = []
|
||||
while (!this.isEOF()) {
|
||||
tokens.push(this.consume())
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// public read() {
|
||||
// throw new Error('Method not implemented.')
|
||||
// }
|
||||
|
||||
Reference in New Issue
Block a user