fix(lexer): use regex for alphanumeric match (support multilingual)

docs: update readme
This commit is contained in:
Chen Asraf
2022-08-16 02:54:58 +03:00
parent a1f60274ea
commit 188bac3502
2 changed files with 49 additions and 7 deletions

View File

@@ -15,6 +15,8 @@ package user (you). It only returns the logical tree of search.
- [Example results](#example-results)
- [How to use](#how-to-use)
- [Supported operators](#supported-operators)
- [Implemented](#implemented)
- [To Do](#to-do)
</details>
<!-- /toc -->
@@ -101,6 +103,8 @@ const results = parse('(mango banana lemon) OR apple -pineapple')
This is the comprehensive list of operators and their object results:
### Implemented
- **Word:** `example`
Any single word. Only alpha-numeric characters, dashes and underscores are considered a word. The
@@ -184,7 +188,9 @@ This is the comprehensive list of operators and their object results:
}
```
- Exclusion: `-example`
### To Do
- **Exclusion:** `-example`
An exclusion is an indication to not include results using the given word, phrase or group.
@@ -199,3 +205,35 @@ This is the comprehensive list of operators and their object results:
}
}
```
- **Domain:** `example-domain:example-token`
A domain prefix signals the following token to only refer to the prefixing domain. For example, a
user could search `name:apple` to only search the word `apple` within the `name` property of the
object being searched on.
**Object:**
```js
{
type: 'domain',
domain: 'example-domain',
value: { // all types of children nodes such as word, phrase, etc
type: 'word',
value: 'example-token',
}
}
```
- **User:** `@example-user`
A user query can signal only searching content from a specific user.
**Object:**
```js
{
type: 'user',
value: 'example-user',
}
```

View File

@@ -274,11 +274,15 @@ export class Lexer implements ILexer {
}
private isAlphanumeric(char: string): boolean {
const charCode = char.charCodeAt(0)
return (
(charCode >= 48 && charCode <= 57) ||
(charCode >= 65 && charCode <= 90) ||
(charCode >= 97 && charCode <= 122)
)
return /^(\w|\d|[-_])$/.test(char)
// return char.length > 0 && 'abcdefghijklmnopqrstuvwxyz0123456789-_'.includes(char.toLowerCase())
// const charCode = char.charCodeAt(0)
// return (
// (charCode >= 48 && charCode <= 57) ||
// (charCode >= 65 && charCode <= 90) ||
// (charCode >= 97 && charCode <= 122)
// )
}
}