fix(lexer): stall on bad characters

2026-05-18 01:49:07 +00:00 · 2022-08-16 03:08:49 +03:00
parent 188bac3502
commit ee8f8aa3ab
2 changed files with 15 additions and 10 deletions
--- a/README.md
+++ b/README.md
@@ -51,8 +51,8 @@ const result = [
 **Input:** `(mango banana lemon) OR apple -pineapple`
-**Explanation:** One of the word: "mango", "banana", "lemon" OR "apple"; exclude all results
+**Explanation:** Either one of the words: "mango", "banana", or "lemon"; OR the word "apple";
-containing "pineapple"
+exclude all results containing "pineapple"
 **Output**:
@@ -108,7 +108,8 @@ This is the comprehensive list of operators and their object results:
 - **Word:** `example`
  Any single word. Only alpha-numeric characters, dashes and underscores are considered a word. The
-  rest is ignored.
+  rest is considered whitespace, which is ignored by the parser, but will cause the surrounding
  tokens to be broken apart.
  **Object:**
--- a/src/lexer.ts
+++ b/src/lexer.ts
@@ -110,11 +110,7 @@ export class Lexer implements ILexer {
      case lexerState.default:
        // whitespace
        if (this.isWhitespace(nextChar)) {
-          this.afterWhitespace = true
+          return this.consumeWhitespace()
          return {
            value: this.reader.consume(),
            token: LexerToken.whitespace,
          }
        }
        // quote
@@ -154,8 +150,8 @@ export class Lexer implements ILexer {
          return this.consumeGroup()
        }
-        // other, consume normally
+        // other, consider as whitespace
-        return this.consumeWord()
+        return this.consumeWhitespace()
      case lexerState.inPhrase:
        this.afterWhitespace = false
@@ -172,6 +168,14 @@ export class Lexer implements ILexer {
    }
  }
  private consumeWhitespace() {
    this.afterWhitespace = true
    return {
      value: this.reader.consume(),
      token: LexerToken.whitespace,
    }
  }
  private consumeQuote(): LexerTokenValue {
    return {
      value: this.reader.consume(),