fix(lexer): stall on bad characters

2026-05-17 17:48:09 +00:00 · 2022-08-16 03:08:49 +03:00
parent 188bac3502
commit ee8f8aa3ab
2 changed files with 15 additions and 10 deletions
--- a/README.md
+++ b/README.md
@@ -51,8 +51,8 @@ const result = [

 **Input:** `(mango banana lemon) OR apple -pineapple`

-**Explanation:** One of the word: "mango", "banana", "lemon" OR "apple"; exclude all results
-containing "pineapple"
+**Explanation:** Either one of the words: "mango", "banana", or "lemon"; OR the word "apple";
+exclude all results containing "pineapple"

 **Output**:

@@ -108,7 +108,8 @@ This is the comprehensive list of operators and their object results:
 - **Word:** `example`

  Any single word. Only alpha-numeric characters, dashes and underscores are considered a word. The
-  rest is ignored.
+  rest is considered whitespace, which is ignored by the parser, but will cause the surrounding
+  tokens to be broken apart.

  **Object:**

--- a/src/lexer.ts
+++ b/src/lexer.ts
@@ -110,11 +110,7 @@ export class Lexer implements ILexer {
      case lexerState.default:
        // whitespace
        if (this.isWhitespace(nextChar)) {
-          this.afterWhitespace = true
-          return {
-            value: this.reader.consume(),
-            token: LexerToken.whitespace,
-          }
+          return this.consumeWhitespace()
        }

        // quote
@@ -154,8 +150,8 @@ export class Lexer implements ILexer {
          return this.consumeGroup()
        }

-        // other, consume normally
-        return this.consumeWord()
+        // other, consider as whitespace
+        return this.consumeWhitespace()
      case lexerState.inPhrase:
        this.afterWhitespace = false

@@ -172,6 +168,14 @@ export class Lexer implements ILexer {
    }
  }

+  private consumeWhitespace() {
+    this.afterWhitespace = true
+    return {
+      value: this.reader.consume(),
+      token: LexerToken.whitespace,
+    }
+  }
+
  private consumeQuote(): LexerTokenValue {
    return {
      value: this.reader.consume(),