fix(lexer): use regex for alphanumeric match (support multilingual)

docs: update readme
2026-05-17 17:48:09 +00:00 · 2022-08-16 02:54:58 +03:00
parent a1f60274ea
commit 188bac3502
2 changed files with 49 additions and 7 deletions
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ package user (you). It only returns the logical tree of search.
 - [Example results](#example-results)
 - [How to use](#how-to-use)
 - [Supported operators](#supported-operators)
+  - [Implemented](#implemented)
+  - [To Do](#to-do)

 </details>
 <!-- /toc -->
@@ -101,6 +103,8 @@ const results = parse('(mango banana lemon) OR apple -pineapple')

 This is the comprehensive list of operators and their object results:

+### Implemented
+
 - **Word:** `example`

  Any single word. Only alpha-numeric characters, dashes and underscores are considered a word. The
@@ -184,7 +188,9 @@ This is the comprehensive list of operators and their object results:
  }
  ```

- Exclusion: `-example`
+### To Do
+
+- **Exclusion:** `-example`

  An exclusion is an indication to not include results using the given word, phrase or group.

@@ -199,3 +205,35 @@ This is the comprehensive list of operators and their object results:
    }
  }
  ```
+
+- **Domain:** `example-domain:example-token`
+
+  A domain prefix signals the following token to only refer to the prefixing domain. For example, a
+  user could search `name:apple` to only search the word `apple` within the `name` property of the
+  object being searched on.
+
+  **Object:**
+
+  ```js
+  {
+    type: 'domain',
+    domain: 'example-domain',
+    value: { // all types of children nodes such as word, phrase, etc
+      type: 'word',
+      value: 'example-token',
+    }
+  }
+  ```
+
+- **User:** `@example-user`
+
+  A user query can signal only searching content from a specific user.
+
+  **Object:**
+
+  ```js
+  {
+    type: 'user',
+    value: 'example-user',
+  }
+  ```
--- a/src/lexer.ts
+++ b/src/lexer.ts
@@ -274,11 +274,15 @@ export class Lexer implements ILexer {
  }

  private isAlphanumeric(char: string): boolean {
-    const charCode = char.charCodeAt(0)
-    return (
-      (charCode >= 48 && charCode <= 57) ||
-      (charCode >= 65 && charCode <= 90) ||
-      (charCode >= 97 && charCode <= 122)
-    )
+    return /^(\w|\d|[-_])$/.test(char)
+
+    // return char.length > 0 && 'abcdefghijklmnopqrstuvwxyz0123456789-_'.includes(char.toLowerCase())
+
+    // const charCode = char.charCodeAt(0)
+    // return (
+    //   (charCode >= 48 && charCode <= 57) ||
+    //   (charCode >= 65 && charCode <= 90) ||
+    //   (charCode >= 97 && charCode <= 122)
+    // )
  }
 }