From f2138f104fa83a10fd58ca914c35620405a42d4d Mon Sep 17 00:00:00 2001 From: Mahmoud Emad Date: Mon, 17 Mar 2025 22:46:26 +0200 Subject: [PATCH] feat: Improve Markdown parser list and table detection - Enhance the accuracy of list detection to correctly identify ordered, unordered, and task lists. - Improve table detection by ensuring a valid separator line exists before confirming a table. - Fix a bug in footnote definition detection to handle cases where the closing bracket is missing. --- lib/data/markdownparser2/parser_helpers.v | 57 ++++++++++++----------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/lib/data/markdownparser2/parser_helpers.v b/lib/data/markdownparser2/parser_helpers.v index 9ad1e255..768d92c1 100644 --- a/lib/data/markdownparser2/parser_helpers.v +++ b/lib/data/markdownparser2/parser_helpers.v @@ -21,33 +21,34 @@ fn (p Parser) is_list_start() bool { if p.pos >= p.text.len { return false } - + // Unordered list: *, -, + - if (p.text[p.pos] == `*` || p.text[p.pos] == `-` || p.text[p.pos] == `+`) && - (p.peek(1) == ` ` || p.peek(1) == `\t`) { + if (p.text[p.pos] == `*` || p.text[p.pos] == `-` || p.text[p.pos] == `+`) + && (p.peek(1) == ` ` || p.peek(1) == `\t`) { return true } - + // Ordered list: 1., 2., etc. if p.pos + 2 < p.text.len && p.text[p.pos].is_digit() { mut i := p.pos + 1 for i < p.text.len && p.text[i].is_digit() { i++ } - if i < p.text.len && p.text[i] == `.` && i + 1 < p.text.len && (p.text[i + 1] == ` ` || p.text[i + 1] == `\t`) { + if i < p.text.len && p.text[i] == `.` && i + 1 < p.text.len + && (p.text[i + 1] == ` ` || p.text[i + 1] == `\t`) { return true } } - + // Task list: - [ ], - [x], etc. - if p.pos + 4 < p.text.len && - (p.text[p.pos] == `-` || p.text[p.pos] == `*` || p.text[p.pos] == `+`) && - p.text[p.pos + 1] == ` ` && p.text[p.pos + 2] == `[` && - (p.text[p.pos + 3] == ` ` || p.text[p.pos + 3] == `x` || p.text[p.pos + 3] == `X`) && - p.text[p.pos + 4] == `]` { + if p.pos + 4 < p.text.len + && (p.text[p.pos] == `-` || p.text[p.pos] == `*` || p.text[p.pos] == `+`) + && p.text[p.pos + 1] == ` ` && p.text[p.pos + 2] == `[` + && (p.text[p.pos + 3] == ` ` || p.text[p.pos + 3] == `x` || p.text[p.pos + 3] == `X`) + && p.text[p.pos + 4] == `]` { return true } - + return false } @@ -56,7 +57,7 @@ fn (p Parser) is_table_start() bool { if p.pos >= p.text.len || p.text[p.pos] != `|` { return false } - + // Look for a pipe character at the beginning of the line // and check if there's at least one more pipe in the line mut has_second_pipe := false @@ -68,38 +69,39 @@ fn (p Parser) is_table_start() bool { } i++ } - + if !has_second_pipe { return false } - + // Check if the next line has a header separator (---|---|...) mut next_line_start := i + 1 if next_line_start >= p.text.len { return false } - + // Skip whitespace at the beginning of the next line - for next_line_start < p.text.len && (p.text[next_line_start] == ` ` || p.text[next_line_start] == `\t`) { + for next_line_start < p.text.len + && (p.text[next_line_start] == ` ` || p.text[next_line_start] == `\t`) { next_line_start++ } - + if next_line_start >= p.text.len || p.text[next_line_start] != `|` { return false } - + // Check for pattern like |---|---|... // We just need to check if there's a valid separator line mut j := next_line_start + 1 for j < p.text.len && p.text[j] != `\n` { // Only allow -, |, :, space, or tab in the separator line - if p.text[j] != `-` && p.text[j] != `|` && p.text[j] != `:` && - p.text[j] != ` ` && p.text[j] != `\t` { + if p.text[j] != `-` && p.text[j] != `|` && p.text[j] != `:` && p.text[j] != ` ` + && p.text[j] != `\t` { return false } j++ } - + return true } @@ -108,8 +110,11 @@ fn (p Parser) is_footnote_definition() bool { if p.pos + 3 >= p.text.len { return false } - - // Check for pattern like [^id]: - return p.text[p.pos] == `[` && p.text[p.pos + 1] == `^` && - p.text[p.pos + 2] != `]` && p.text.index_after(']:', p.pos + 2) > p.pos + 2 + + if idx := p.text.index_after(']:', p.pos + 2) { + return p.text[p.pos] == `[` && p.text[p.pos + 1] == `^` && p.text[p.pos + 2] != `]` + && idx > p.pos + 2 + } else { + return false + } }