fix(tokenizer): logical OR/AND group

This commit is contained in:
Chen Asraf
2022-08-14 03:33:58 +03:00
parent 139bc886f1
commit c1ed498fe7
4 changed files with 118 additions and 27 deletions

6
.gitignore vendored
View File

@@ -1,2 +1,4 @@
/coverage
/node_modules
# Coverage directory used by tools like istanbul
coverage
# Dependency directories
node_modules/

View File

@@ -18,19 +18,88 @@ describe('Tokenizer', () => {
expect(token.value).toBe('word')
})
test('Should tokenize group', () => {
const reader = new StringReader('(word)')
const tokenizer = new Tokenizer(reader)
let token = tokenizer.peek()
tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe('(')
token = tokenizer.peek()
tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
token = tokenizer.peek()
expect(token.token).toBe(Token.group)
expect(token.value).toBe(')')
describe('Groups', () => {
it('Should tokenize single-word group', () => {
const reader = new StringReader('(word)')
const tokenizer = new Tokenizer(reader)
let token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe('(')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe(')')
})
it('Should tokenize logical operator OR group', () => {
const reader = new StringReader('(word OR word)')
const tokenizer = new Tokenizer(reader)
let token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe('(')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.operator)
expect(token.value).toBe('or')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe(')')
})
it('Should tokenize logical operator AND group', () => {
const reader = new StringReader('(word AND word)')
const tokenizer = new Tokenizer(reader)
let token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe('(')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.operator)
expect(token.value).toBe('and')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe(')')
})
})
})

View File

@@ -2,6 +2,7 @@ export abstract class InputReader<T> {
public abstract peek(n?: number): T
public abstract consume(n?: number): T
public abstract setIndex(n: number): void
public abstract isEOF(): boolean
public index!: number
}
@@ -27,6 +28,10 @@ export class BufferReader implements InputReader<string> {
public setIndex(n: number): void {
this.index = n
}
public isEOF(): boolean {
return this.index >= this.buffer.length
}
}
export class StringReader implements InputReader<string> {
@@ -38,11 +43,11 @@ export class StringReader implements InputReader<string> {
}
public peek(n = 0): string {
return this.string.substring(this.index + n, this.index + 1)
return this.string.substring(this.index + n, this.index + n + 1)
}
public consume(n = 0): string {
const result = this.string.substring(this.index + n, this.index + 1)
const result = this.string.substring(this.index + n, this.index + n + 1)
this.index++
return result
}
@@ -50,4 +55,8 @@ export class StringReader implements InputReader<string> {
public setIndex(n: number): void {
this.index = n
}
public isEOF(): boolean {
return this.index >= this.string.length
}
}

View File

@@ -3,7 +3,7 @@ import { InputReader } from './reader'
export enum TokenizerState {
default,
inPhrase,
inGroup,
// inGroup,
}
export enum Token {
@@ -31,6 +31,10 @@ export class Tokenizer implements InputReader<TokenValue> {
this.reader = reader
}
public isEOF(): boolean {
return this.reader.isEOF()
}
public setIndex(n: number): void {
this.index = n
}
@@ -65,8 +69,8 @@ export class Tokenizer implements InputReader<TokenValue> {
return this.consumeWord()
}
if (nextChar === '(') {
this.state = TokenizerState.inGroup
if (nextChar === '(' || nextChar === ')') {
// this.state = TokenizerState.inGroup
return this.consumeGroup()
}
return this.consumeWord()
@@ -76,23 +80,22 @@ export class Tokenizer implements InputReader<TokenValue> {
return this.consumePhrase()
}
return this.consumeWord()
case TokenizerState.inGroup:
if (nextChar === ')') {
this.state = TokenizerState.default
return this.consumeGroup()
}
return this.consumeWord()
default:
throw new Error('bad state')
}
}
consumeAnd(): TokenValue {
this.reader.consume()
this.reader.consume()
this.reader.consume()
return {
value: 'and',
token: Token.operator,
}
}
consumeOr(): TokenValue {
this.reader.consume()
this.reader.consume()
return {
value: 'or',
token: Token.operator,
@@ -155,6 +158,14 @@ export class Tokenizer implements InputReader<TokenValue> {
return token
}
public read(): TokenValue[] {
const tokens: TokenValue[] = []
while (!this.isEOF()) {
tokens.push(this.consume())
}
return tokens
}
// public read() {
// throw new Error('Method not implemented.')
// }