feat: add & and | logical operators

This commit is contained in:
Chen Asraf
2022-08-14 15:17:10 +03:00
parent c1ed498fe7
commit 89ec5ebd80
2 changed files with 156 additions and 75 deletions

View File

@@ -19,87 +19,130 @@ describe('Tokenizer', () => {
})
describe('Groups', () => {
it('Should tokenize single-word group', () => {
test('Should tokenize single-word group', () => {
const reader = new StringReader('(word)')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
let token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe('(')
expect(tokens[0].token).toBe(Token.group)
expect(tokens[0].value).toBe('(')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
expect(tokens[1].token).toBe(Token.word)
expect(tokens[1].value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe(')')
expect(tokens[2].token).toBe(Token.group)
expect(tokens[2].value).toBe(')')
})
it('Should tokenize logical operator OR group', () => {
const reader = new StringReader('(word OR word)')
const tokenizer = new Tokenizer(reader)
describe('logical operator OR group', () => {
test('should parse OR separator', () => {
const reader = new StringReader('(word OR word)')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
let token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe('(')
expect(tokens[0].token).toBe(Token.group)
expect(tokens[0].value).toBe('(')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
expect(tokens[1].token).toBe(Token.word)
expect(tokens[1].value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
expect(tokens[2].token).toBe(Token.whitespace)
expect(tokens[2].value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.operator)
expect(token.value).toBe('or')
expect(tokens[3].token).toBe(Token.operator)
expect(tokens[3].value).toBe('or')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
expect(tokens[4].token).toBe(Token.whitespace)
expect(tokens[4].value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
expect(tokens[5].token).toBe(Token.word)
expect(tokens[5].value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe(')')
expect(tokens[6].token).toBe(Token.group)
expect(tokens[6].value).toBe(')')
})
test('should parse | separator', () => {
const reader = new StringReader('(word | word)')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
expect(tokens[0].token).toBe(Token.group)
expect(tokens[0].value).toBe('(')
expect(tokens[1].token).toBe(Token.word)
expect(tokens[1].value).toBe('word')
expect(tokens[2].token).toBe(Token.whitespace)
expect(tokens[2].value).toBe(' ')
expect(tokens[3].token).toBe(Token.operator)
expect(tokens[3].value).toBe('|')
expect(tokens[4].token).toBe(Token.whitespace)
expect(tokens[4].value).toBe(' ')
expect(tokens[5].token).toBe(Token.word)
expect(tokens[5].value).toBe('word')
expect(tokens[6].token).toBe(Token.group)
expect(tokens[6].value).toBe(')')
})
})
it('Should tokenize logical operator AND group', () => {
const reader = new StringReader('(word AND word)')
const tokenizer = new Tokenizer(reader)
describe('logical operator AND group', () => {
test('should parse AND separator', () => {
const reader = new StringReader('(word AND word)')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
let token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe('(')
expect(tokens[0].token).toBe(Token.group)
expect(tokens[0].value).toBe('(')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
expect(tokens[1].token).toBe(Token.word)
expect(tokens[1].value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
expect(tokens[2].token).toBe(Token.whitespace)
expect(tokens[2].value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.operator)
expect(token.value).toBe('and')
expect(tokens[3].token).toBe(Token.operator)
expect(tokens[3].value).toBe('and')
token = tokenizer.consume()
expect(token.token).toBe(Token.whitespace)
expect(token.value).toBe(' ')
expect(tokens[4].token).toBe(Token.whitespace)
expect(tokens[4].value).toBe(' ')
token = tokenizer.consume()
expect(token.token).toBe(Token.word)
expect(token.value).toBe('word')
expect(tokens[5].token).toBe(Token.word)
expect(tokens[5].value).toBe('word')
token = tokenizer.consume()
expect(token.token).toBe(Token.group)
expect(token.value).toBe(')')
expect(tokens[6].token).toBe(Token.group)
expect(tokens[6].value).toBe(')')
})
test('should parse & separator', () => {
const reader = new StringReader('(word & word)')
const tokenizer = new Tokenizer(reader)
const tokens = tokenizer.read()
expect(tokens[0].token).toBe(Token.group)
expect(tokens[0].value).toBe('(')
expect(tokens[1].token).toBe(Token.word)
expect(tokens[1].value).toBe('word')
expect(tokens[2].token).toBe(Token.whitespace)
expect(tokens[2].value).toBe(' ')
expect(tokens[3].token).toBe(Token.operator)
expect(tokens[3].value).toBe('&')
expect(tokens[4].token).toBe(Token.whitespace)
expect(tokens[4].value).toBe(' ')
expect(tokens[5].token).toBe(Token.word)
expect(tokens[5].value).toBe('word')
expect(tokens[6].token).toBe(Token.group)
expect(tokens[6].value).toBe(')')
})
})
})
})

View File

@@ -56,19 +56,23 @@ export class Tokenizer implements InputReader<TokenValue> {
}
if (this.isAlphanumeric(nextChar.charCodeAt(0))) {
if (nextChar.toUpperCase() === 'O' && this.reader.peek(1) === 'R') {
if (this.confirmExactWord('OR')) {
return this.consumeOr()
}
if (
nextChar.toUpperCase() === 'A' &&
this.reader.peek(1) === 'N' &&
this.reader.peek(2) === 'D'
) {
if (this.confirmExactWord('AND')) {
return this.consumeAnd()
}
return this.consumeWord()
}
if (nextChar === '|') {
return this.consumeOr()
}
if (nextChar === '&') {
return this.consumeAnd()
}
if (nextChar === '(' || nextChar === ')') {
// this.state = TokenizerState.inGroup
return this.consumeGroup()
@@ -84,24 +88,62 @@ export class Tokenizer implements InputReader<TokenValue> {
throw new Error('bad state')
}
}
consumeAnd(): TokenValue {
this.reader.consume()
this.reader.consume()
this.reader.consume()
let value = ''
if (this.confirmExactWord('AND')) {
this.consumeExactWord('AND')
value = 'and'
} else if (this.confirmExactWord('&')) {
this.consumeExactWord('&')
value = '&'
}
return {
value: 'and',
value,
token: Token.operator,
}
}
consumeOr(): TokenValue {
this.reader.consume()
this.reader.consume()
let value = ''
if (this.confirmExactWord('OR')) {
this.consumeExactWord('OR')
value = 'or'
} else if (this.confirmExactWord('|')) {
this.consumeExactWord('|')
value = '|'
}
return {
value: 'or',
value,
token: Token.operator,
}
}
confirmExactWord(word: string) {
let nextChar = this.reader.peek()
for (let i = 0; i < word.length; i++) {
if (nextChar !== word[i]) {
return false
}
nextChar = this.reader.peek(i + 1)
}
return true
}
consumeExactWord(word: string) {
if (this.confirmExactWord(word)) {
this.consumeReader(word.length)
} else {
throw new Error("Can't find exact word: " + word)
}
}
consumeReader(times = 1) {
for (let i = 0; i < times; i++) {
this.reader.consume()
}
}
private consumeGroup(): TokenValue {
return {
value: this.reader.consume(),
@@ -166,10 +208,6 @@ export class Tokenizer implements InputReader<TokenValue> {
return tokens
}
// public read() {
// throw new Error('Method not implemented.')
// }
private isWhitespace(nextChar: string) {
return ' \t\n\r'.includes(nextChar)
}