diff --git a/README.md b/README.md index fbaa716..f06d9a1 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,8 @@ const result = [ **Input:** `(mango banana lemon) OR apple -pineapple` -**Explanation:** One of the word: "mango", "banana", "lemon" OR "apple"; exclude all results -containing "pineapple" +**Explanation:** Either one of the words: "mango", "banana", or "lemon"; OR the word "apple"; +exclude all results containing "pineapple" **Output**: @@ -108,7 +108,8 @@ This is the comprehensive list of operators and their object results: - **Word:** `example` Any single word. Only alpha-numeric characters, dashes and underscores are considered a word. The - rest is ignored. + rest is considered whitespace, which is ignored by the parser, but will cause the surrounding + tokens to be broken apart. **Object:** diff --git a/src/lexer.ts b/src/lexer.ts index 7abc844..56855c7 100644 --- a/src/lexer.ts +++ b/src/lexer.ts @@ -110,11 +110,7 @@ export class Lexer implements ILexer { case lexerState.default: // whitespace if (this.isWhitespace(nextChar)) { - this.afterWhitespace = true - return { - value: this.reader.consume(), - token: LexerToken.whitespace, - } + return this.consumeWhitespace() } // quote @@ -154,8 +150,8 @@ export class Lexer implements ILexer { return this.consumeGroup() } - // other, consume normally - return this.consumeWord() + // other, consider as whitespace + return this.consumeWhitespace() case lexerState.inPhrase: this.afterWhitespace = false @@ -172,6 +168,14 @@ export class Lexer implements ILexer { } } + private consumeWhitespace() { + this.afterWhitespace = true + return { + value: this.reader.consume(), + token: LexerToken.whitespace, + } + } + private consumeQuote(): LexerTokenValue { return { value: this.reader.consume(),