feat: Improve Markdown parser list and table detection

- Enhance the accuracy of list detection to correctly identify
  ordered, unordered, and task lists.
- Improve table detection by ensuring a valid separator line
  exists before confirming a table.
- Fix a bug in footnote definition detection to handle cases
  where the closing bracket is missing.
This commit is contained in:
Mahmoud Emad
2025-03-17 22:46:26 +02:00
parent 04ee73e8dd
commit f2138f104f

View File

@@ -21,33 +21,34 @@ fn (p Parser) is_list_start() bool {
if p.pos >= p.text.len { if p.pos >= p.text.len {
return false return false
} }
// Unordered list: *, -, + // Unordered list: *, -, +
if (p.text[p.pos] == `*` || p.text[p.pos] == `-` || p.text[p.pos] == `+`) && if (p.text[p.pos] == `*` || p.text[p.pos] == `-` || p.text[p.pos] == `+`)
(p.peek(1) == ` ` || p.peek(1) == `\t`) { && (p.peek(1) == ` ` || p.peek(1) == `\t`) {
return true return true
} }
// Ordered list: 1., 2., etc. // Ordered list: 1., 2., etc.
if p.pos + 2 < p.text.len && p.text[p.pos].is_digit() { if p.pos + 2 < p.text.len && p.text[p.pos].is_digit() {
mut i := p.pos + 1 mut i := p.pos + 1
for i < p.text.len && p.text[i].is_digit() { for i < p.text.len && p.text[i].is_digit() {
i++ i++
} }
if i < p.text.len && p.text[i] == `.` && i + 1 < p.text.len && (p.text[i + 1] == ` ` || p.text[i + 1] == `\t`) { if i < p.text.len && p.text[i] == `.` && i + 1 < p.text.len
&& (p.text[i + 1] == ` ` || p.text[i + 1] == `\t`) {
return true return true
} }
} }
// Task list: - [ ], - [x], etc. // Task list: - [ ], - [x], etc.
if p.pos + 4 < p.text.len && if p.pos + 4 < p.text.len
(p.text[p.pos] == `-` || p.text[p.pos] == `*` || p.text[p.pos] == `+`) && && (p.text[p.pos] == `-` || p.text[p.pos] == `*` || p.text[p.pos] == `+`)
p.text[p.pos + 1] == ` ` && p.text[p.pos + 2] == `[` && && p.text[p.pos + 1] == ` ` && p.text[p.pos + 2] == `[`
(p.text[p.pos + 3] == ` ` || p.text[p.pos + 3] == `x` || p.text[p.pos + 3] == `X`) && && (p.text[p.pos + 3] == ` ` || p.text[p.pos + 3] == `x` || p.text[p.pos + 3] == `X`)
p.text[p.pos + 4] == `]` { && p.text[p.pos + 4] == `]` {
return true return true
} }
return false return false
} }
@@ -56,7 +57,7 @@ fn (p Parser) is_table_start() bool {
if p.pos >= p.text.len || p.text[p.pos] != `|` { if p.pos >= p.text.len || p.text[p.pos] != `|` {
return false return false
} }
// Look for a pipe character at the beginning of the line // Look for a pipe character at the beginning of the line
// and check if there's at least one more pipe in the line // and check if there's at least one more pipe in the line
mut has_second_pipe := false mut has_second_pipe := false
@@ -68,38 +69,39 @@ fn (p Parser) is_table_start() bool {
} }
i++ i++
} }
if !has_second_pipe { if !has_second_pipe {
return false return false
} }
// Check if the next line has a header separator (---|---|...) // Check if the next line has a header separator (---|---|...)
mut next_line_start := i + 1 mut next_line_start := i + 1
if next_line_start >= p.text.len { if next_line_start >= p.text.len {
return false return false
} }
// Skip whitespace at the beginning of the next line // Skip whitespace at the beginning of the next line
for next_line_start < p.text.len && (p.text[next_line_start] == ` ` || p.text[next_line_start] == `\t`) { for next_line_start < p.text.len
&& (p.text[next_line_start] == ` ` || p.text[next_line_start] == `\t`) {
next_line_start++ next_line_start++
} }
if next_line_start >= p.text.len || p.text[next_line_start] != `|` { if next_line_start >= p.text.len || p.text[next_line_start] != `|` {
return false return false
} }
// Check for pattern like |---|---|... // Check for pattern like |---|---|...
// We just need to check if there's a valid separator line // We just need to check if there's a valid separator line
mut j := next_line_start + 1 mut j := next_line_start + 1
for j < p.text.len && p.text[j] != `\n` { for j < p.text.len && p.text[j] != `\n` {
// Only allow -, |, :, space, or tab in the separator line // Only allow -, |, :, space, or tab in the separator line
if p.text[j] != `-` && p.text[j] != `|` && p.text[j] != `:` && if p.text[j] != `-` && p.text[j] != `|` && p.text[j] != `:` && p.text[j] != ` `
p.text[j] != ` ` && p.text[j] != `\t` { && p.text[j] != `\t` {
return false return false
} }
j++ j++
} }
return true return true
} }
@@ -108,8 +110,11 @@ fn (p Parser) is_footnote_definition() bool {
if p.pos + 3 >= p.text.len { if p.pos + 3 >= p.text.len {
return false return false
} }
// Check for pattern like [^id]: if idx := p.text.index_after(']:', p.pos + 2) {
return p.text[p.pos] == `[` && p.text[p.pos + 1] == `^` && return p.text[p.pos] == `[` && p.text[p.pos + 1] == `^` && p.text[p.pos + 2] != `]`
p.text[p.pos + 2] != `]` && p.text.index_after(']:', p.pos + 2) > p.pos + 2 && idx > p.pos + 2
} else {
return false
}
} }