feat(markdown)!: switch to split parser (#3048)

* switch to split markdown parser with separate block and inline parsers to improve performance
* add exclude_children! directive (useful for something like Injected markdown incorrectly highlights indented docstrings #2212)
* split markdown queries into block and inline ones and add the injection for inline into block grammar
* add include_dir option to parser configs (needed because the two grammars don't live in the repos root directory)

BREAKING CHANGE: downstream queries need to be adapted to new parser
This commit is contained in:
Matthias Deiml
2022-06-26 18:02:29 +02:00
committed by GitHub
parent d810c38634
commit 002084b1be
7 changed files with 121 additions and 61 deletions

View File

@@ -192,7 +192,10 @@
"revision": "a4b9187417d6be349ee5fd4b6e77b4172c6827dd"
},
"markdown": {
"revision": "be3e08acfd85bd87d85f41fde74fdcec25f76dbe"
"revision": "acb097808683eea7c6d6d469644a275b9f6a64f5"
},
"markdown_inline": {
"revision": "acb097808683eea7c6d6d469644a275b9f6a64f5"
},
"ninja": {
"revision": "0a95cfdc0745b6ae82f60d3a339b37f19b7b9267"

View File

@@ -528,9 +528,22 @@ list.hcl = {
list.markdown = {
install_info = {
url = "https://github.com/MDeiml/tree-sitter-markdown",
location = "tree-sitter-markdown",
files = { "src/parser.c", "src/scanner.cc" },
branch = "main",
branch = "split_parser",
},
maintainers = { "@MDeiml" },
experimental = true,
}
list.markdown_inline = {
install_info = {
url = "https://github.com/MDeiml/tree-sitter-markdown",
location = "tree-sitter-markdown-inline",
files = { "src/parser.c", "src/scanner.cc" },
branch = "split_parser",
},
maintainers = { "@MDeiml" },
experimental = true,
}

View File

@@ -128,3 +128,28 @@ query.add_directive("downcase!", function(match, _, bufnr, pred, metadata)
metadata[key] = string.lower(text)
end
end)
query.add_directive("exclude_children!", function(match, _pattern, _bufnr, pred, metadata)
local capture_id = pred[2]
local node = match[capture_id]
local start_row, start_col, end_row, end_col = node:range()
local ranges = {}
for i = 0, node:named_child_count() - 1 do
local child = node:named_child(i)
local child_start_row, child_start_col, child_end_row, child_end_col = child:range()
if child_start_row > start_row or child_start_col > start_col then
table.insert(ranges, {
start_row,
start_col,
child_start_row,
child_start_col,
})
end
start_row = child_end_row
start_col = child_end_col
end
if end_row > start_row or end_col > start_col then
table.insert(ranges, { start_row, start_col, end_row, end_col })
end
metadata.content = ranges
end)

View File

@@ -1,6 +1,6 @@
;; From MDeiml/tree-sitter-markdown
(atx_heading (heading_content) @text.title)
(setext_heading (heading_content) @text.title)
;From MDeiml/tree-sitter-markdown
(atx_heading (inline) @text.title)
(setext_heading (paragraph) @text.title)
[
(atx_h1_marker)
@@ -14,33 +14,23 @@
] @punctuation.special
[
(code_span)
(link_title)
(indented_code_block)
(fenced_code_block)
] @text.literal
[
(emphasis_delimiter)
(code_span_delimiter)
(fenced_code_block_delimiter)
] @punctuation.delimiter
(code_fence_content) @none
(emphasis) @text.emphasis
(strong_emphasis) @text.strong
[
(link_destination)
(uri_autolink)
] @text.uri
[
(link_label)
(link_text)
(image_description)
] @text.reference
[
@@ -52,56 +42,17 @@
(thematic_break)
] @punctuation.special
(block_quote_marker) @punctuation.special
[
(block_continuation)
(block_quote_marker)
] @punctuation.special
[
(backslash_escape)
(hard_line_break)
] @string.escape
(image "!" @punctuation.delimiter)
(image "[" @punctuation.delimiter)
(image "]" @punctuation.delimiter)
(image "(" @punctuation.delimiter)
; (image ")" @punctuation.delimiter)
(inline_link "[" @punctuation.delimiter)
(inline_link "]" @punctuation.delimiter)
(inline_link "(" @punctuation.delimiter)
; (inline_link ")" @punctuation.delimiter)
(shortcut_link "[" @punctuation.delimiter)
(shortcut_link "]" @punctuation.delimiter)
([
(info_string)
(fenced_code_block_delimiter)
(code_span_delimiter)
(emphasis_delimiter)
] @conceal
(#set! conceal ""))
; Conceal inline links
(inline_link
[
"["
"]"
"("
(link_destination)
")"
] @conceal
(#set! conceal ""))
; Conceal image links
(image
[
"!"
"["
"]"
"("
(link_destination)
")"
] @conceal
(#set! conceal ""))

View File

@@ -1,9 +1,9 @@
(fenced_code_block
(info_string
(language) @language)
(code_fence_content) @content)
(code_fence_content) @content (#exclude_children! @content))
((html_block) @html)
((html_tag) @html)
(document . (thematic_break) (_) @yaml @combined (thematic_break))
(document . (section . (thematic_break) (_) @yaml @combined (thematic_break)))
((inline) @markdown_inline (#exclude_children! @markdown_inline))

View File

@@ -0,0 +1,67 @@
;; From MDeiml/tree-sitter-markdown
[
(code_span)
(link_title)
] @text.literal
[
(emphasis_delimiter)
(code_span_delimiter)
] @punctuation.delimiter
(emphasis) @text.emphasis
(strong_emphasis) @text.strong
[
(link_destination)
(uri_autolink)
] @text.uri
[
(link_label)
(link_text)
(image_description)
] @text.reference
[
(backslash_escape)
(hard_line_break)
] @string.escape
; "(" not part of query because of
; https://github.com/nvim-treesitter/nvim-treesitter/issues/2206
; TODO: Find better fix for this
(image ["!" "[" "]" "("] @punctuation.delimiter)
(inline_link ["[" "]" "("] @punctuation.delimiter)
(shortcut_link ["[" "]"] @punctuation.delimiter)
([
(code_span_delimiter)
(emphasis_delimiter)
] @conceal
(#set! conceal ""))
; Conceal inline links
(inline_link
[
"["
"]"
"("
(link_destination)
")"
] @conceal
(#set! conceal ""))
; Conceal image links
(image
[
"!"
"["
"]"
"("
(link_destination)
")"
] @conceal
(#set! conceal ""))

View File

@@ -0,0 +1 @@
((html_tag) @html)