mirror of
https://github.com/chenasraf/search-ast-parser-js.git
synced 2026-05-17 17:48:09 +00:00
feat: add & and | logical operators
This commit is contained in:
@@ -19,87 +19,130 @@ describe('Tokenizer', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
describe('Groups', () => {
|
describe('Groups', () => {
|
||||||
it('Should tokenize single-word group', () => {
|
test('Should tokenize single-word group', () => {
|
||||||
const reader = new StringReader('(word)')
|
const reader = new StringReader('(word)')
|
||||||
const tokenizer = new Tokenizer(reader)
|
const tokenizer = new Tokenizer(reader)
|
||||||
|
const tokens = tokenizer.read()
|
||||||
|
|
||||||
let token = tokenizer.consume()
|
expect(tokens[0].token).toBe(Token.group)
|
||||||
expect(token.token).toBe(Token.group)
|
expect(tokens[0].value).toBe('(')
|
||||||
expect(token.value).toBe('(')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[1].token).toBe(Token.word)
|
||||||
expect(token.token).toBe(Token.word)
|
expect(tokens[1].value).toBe('word')
|
||||||
expect(token.value).toBe('word')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[2].token).toBe(Token.group)
|
||||||
expect(token.token).toBe(Token.group)
|
expect(tokens[2].value).toBe(')')
|
||||||
expect(token.value).toBe(')')
|
|
||||||
})
|
})
|
||||||
|
|
||||||
it('Should tokenize logical operator OR group', () => {
|
describe('logical operator OR group', () => {
|
||||||
const reader = new StringReader('(word OR word)')
|
test('should parse OR separator', () => {
|
||||||
const tokenizer = new Tokenizer(reader)
|
const reader = new StringReader('(word OR word)')
|
||||||
|
const tokenizer = new Tokenizer(reader)
|
||||||
|
const tokens = tokenizer.read()
|
||||||
|
|
||||||
let token = tokenizer.consume()
|
expect(tokens[0].token).toBe(Token.group)
|
||||||
expect(token.token).toBe(Token.group)
|
expect(tokens[0].value).toBe('(')
|
||||||
expect(token.value).toBe('(')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[1].token).toBe(Token.word)
|
||||||
expect(token.token).toBe(Token.word)
|
expect(tokens[1].value).toBe('word')
|
||||||
expect(token.value).toBe('word')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[2].token).toBe(Token.whitespace)
|
||||||
expect(token.token).toBe(Token.whitespace)
|
expect(tokens[2].value).toBe(' ')
|
||||||
expect(token.value).toBe(' ')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[3].token).toBe(Token.operator)
|
||||||
expect(token.token).toBe(Token.operator)
|
expect(tokens[3].value).toBe('or')
|
||||||
expect(token.value).toBe('or')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[4].token).toBe(Token.whitespace)
|
||||||
expect(token.token).toBe(Token.whitespace)
|
expect(tokens[4].value).toBe(' ')
|
||||||
expect(token.value).toBe(' ')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[5].token).toBe(Token.word)
|
||||||
expect(token.token).toBe(Token.word)
|
expect(tokens[5].value).toBe('word')
|
||||||
expect(token.value).toBe('word')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[6].token).toBe(Token.group)
|
||||||
expect(token.token).toBe(Token.group)
|
expect(tokens[6].value).toBe(')')
|
||||||
expect(token.value).toBe(')')
|
})
|
||||||
|
|
||||||
|
test('should parse | separator', () => {
|
||||||
|
const reader = new StringReader('(word | word)')
|
||||||
|
const tokenizer = new Tokenizer(reader)
|
||||||
|
const tokens = tokenizer.read()
|
||||||
|
|
||||||
|
expect(tokens[0].token).toBe(Token.group)
|
||||||
|
expect(tokens[0].value).toBe('(')
|
||||||
|
|
||||||
|
expect(tokens[1].token).toBe(Token.word)
|
||||||
|
expect(tokens[1].value).toBe('word')
|
||||||
|
|
||||||
|
expect(tokens[2].token).toBe(Token.whitespace)
|
||||||
|
expect(tokens[2].value).toBe(' ')
|
||||||
|
|
||||||
|
expect(tokens[3].token).toBe(Token.operator)
|
||||||
|
expect(tokens[3].value).toBe('|')
|
||||||
|
|
||||||
|
expect(tokens[4].token).toBe(Token.whitespace)
|
||||||
|
expect(tokens[4].value).toBe(' ')
|
||||||
|
|
||||||
|
expect(tokens[5].token).toBe(Token.word)
|
||||||
|
expect(tokens[5].value).toBe('word')
|
||||||
|
|
||||||
|
expect(tokens[6].token).toBe(Token.group)
|
||||||
|
expect(tokens[6].value).toBe(')')
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
it('Should tokenize logical operator AND group', () => {
|
describe('logical operator AND group', () => {
|
||||||
const reader = new StringReader('(word AND word)')
|
test('should parse AND separator', () => {
|
||||||
const tokenizer = new Tokenizer(reader)
|
const reader = new StringReader('(word AND word)')
|
||||||
|
const tokenizer = new Tokenizer(reader)
|
||||||
|
const tokens = tokenizer.read()
|
||||||
|
|
||||||
let token = tokenizer.consume()
|
expect(tokens[0].token).toBe(Token.group)
|
||||||
expect(token.token).toBe(Token.group)
|
expect(tokens[0].value).toBe('(')
|
||||||
expect(token.value).toBe('(')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[1].token).toBe(Token.word)
|
||||||
expect(token.token).toBe(Token.word)
|
expect(tokens[1].value).toBe('word')
|
||||||
expect(token.value).toBe('word')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[2].token).toBe(Token.whitespace)
|
||||||
expect(token.token).toBe(Token.whitespace)
|
expect(tokens[2].value).toBe(' ')
|
||||||
expect(token.value).toBe(' ')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[3].token).toBe(Token.operator)
|
||||||
expect(token.token).toBe(Token.operator)
|
expect(tokens[3].value).toBe('and')
|
||||||
expect(token.value).toBe('and')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[4].token).toBe(Token.whitespace)
|
||||||
expect(token.token).toBe(Token.whitespace)
|
expect(tokens[4].value).toBe(' ')
|
||||||
expect(token.value).toBe(' ')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[5].token).toBe(Token.word)
|
||||||
expect(token.token).toBe(Token.word)
|
expect(tokens[5].value).toBe('word')
|
||||||
expect(token.value).toBe('word')
|
|
||||||
|
|
||||||
token = tokenizer.consume()
|
expect(tokens[6].token).toBe(Token.group)
|
||||||
expect(token.token).toBe(Token.group)
|
expect(tokens[6].value).toBe(')')
|
||||||
expect(token.value).toBe(')')
|
})
|
||||||
|
test('should parse & separator', () => {
|
||||||
|
const reader = new StringReader('(word & word)')
|
||||||
|
const tokenizer = new Tokenizer(reader)
|
||||||
|
const tokens = tokenizer.read()
|
||||||
|
|
||||||
|
expect(tokens[0].token).toBe(Token.group)
|
||||||
|
expect(tokens[0].value).toBe('(')
|
||||||
|
|
||||||
|
expect(tokens[1].token).toBe(Token.word)
|
||||||
|
expect(tokens[1].value).toBe('word')
|
||||||
|
|
||||||
|
expect(tokens[2].token).toBe(Token.whitespace)
|
||||||
|
expect(tokens[2].value).toBe(' ')
|
||||||
|
|
||||||
|
expect(tokens[3].token).toBe(Token.operator)
|
||||||
|
expect(tokens[3].value).toBe('&')
|
||||||
|
|
||||||
|
expect(tokens[4].token).toBe(Token.whitespace)
|
||||||
|
expect(tokens[4].value).toBe(' ')
|
||||||
|
|
||||||
|
expect(tokens[5].token).toBe(Token.word)
|
||||||
|
expect(tokens[5].value).toBe('word')
|
||||||
|
|
||||||
|
expect(tokens[6].token).toBe(Token.group)
|
||||||
|
expect(tokens[6].value).toBe(')')
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -56,19 +56,23 @@ export class Tokenizer implements InputReader<TokenValue> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this.isAlphanumeric(nextChar.charCodeAt(0))) {
|
if (this.isAlphanumeric(nextChar.charCodeAt(0))) {
|
||||||
if (nextChar.toUpperCase() === 'O' && this.reader.peek(1) === 'R') {
|
if (this.confirmExactWord('OR')) {
|
||||||
return this.consumeOr()
|
return this.consumeOr()
|
||||||
}
|
}
|
||||||
if (
|
if (this.confirmExactWord('AND')) {
|
||||||
nextChar.toUpperCase() === 'A' &&
|
|
||||||
this.reader.peek(1) === 'N' &&
|
|
||||||
this.reader.peek(2) === 'D'
|
|
||||||
) {
|
|
||||||
return this.consumeAnd()
|
return this.consumeAnd()
|
||||||
}
|
}
|
||||||
return this.consumeWord()
|
return this.consumeWord()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nextChar === '|') {
|
||||||
|
return this.consumeOr()
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextChar === '&') {
|
||||||
|
return this.consumeAnd()
|
||||||
|
}
|
||||||
|
|
||||||
if (nextChar === '(' || nextChar === ')') {
|
if (nextChar === '(' || nextChar === ')') {
|
||||||
// this.state = TokenizerState.inGroup
|
// this.state = TokenizerState.inGroup
|
||||||
return this.consumeGroup()
|
return this.consumeGroup()
|
||||||
@@ -84,24 +88,62 @@ export class Tokenizer implements InputReader<TokenValue> {
|
|||||||
throw new Error('bad state')
|
throw new Error('bad state')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
consumeAnd(): TokenValue {
|
consumeAnd(): TokenValue {
|
||||||
this.reader.consume()
|
let value = ''
|
||||||
this.reader.consume()
|
if (this.confirmExactWord('AND')) {
|
||||||
this.reader.consume()
|
this.consumeExactWord('AND')
|
||||||
|
value = 'and'
|
||||||
|
} else if (this.confirmExactWord('&')) {
|
||||||
|
this.consumeExactWord('&')
|
||||||
|
value = '&'
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
value: 'and',
|
value,
|
||||||
token: Token.operator,
|
token: Token.operator,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
consumeOr(): TokenValue {
|
consumeOr(): TokenValue {
|
||||||
this.reader.consume()
|
let value = ''
|
||||||
this.reader.consume()
|
if (this.confirmExactWord('OR')) {
|
||||||
|
this.consumeExactWord('OR')
|
||||||
|
value = 'or'
|
||||||
|
} else if (this.confirmExactWord('|')) {
|
||||||
|
this.consumeExactWord('|')
|
||||||
|
value = '|'
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
value: 'or',
|
value,
|
||||||
token: Token.operator,
|
token: Token.operator,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
confirmExactWord(word: string) {
|
||||||
|
let nextChar = this.reader.peek()
|
||||||
|
for (let i = 0; i < word.length; i++) {
|
||||||
|
if (nextChar !== word[i]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
nextChar = this.reader.peek(i + 1)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
consumeExactWord(word: string) {
|
||||||
|
if (this.confirmExactWord(word)) {
|
||||||
|
this.consumeReader(word.length)
|
||||||
|
} else {
|
||||||
|
throw new Error("Can't find exact word: " + word)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
consumeReader(times = 1) {
|
||||||
|
for (let i = 0; i < times; i++) {
|
||||||
|
this.reader.consume()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private consumeGroup(): TokenValue {
|
private consumeGroup(): TokenValue {
|
||||||
return {
|
return {
|
||||||
value: this.reader.consume(),
|
value: this.reader.consume(),
|
||||||
@@ -166,10 +208,6 @@ export class Tokenizer implements InputReader<TokenValue> {
|
|||||||
return tokens
|
return tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
// public read() {
|
|
||||||
// throw new Error('Method not implemented.')
|
|
||||||
// }
|
|
||||||
|
|
||||||
private isWhitespace(nextChar: string) {
|
private isWhitespace(nextChar: string) {
|
||||||
return ' \t\n\r'.includes(nextChar)
|
return ' \t\n\r'.includes(nextChar)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user