feat: Improve Markdown parser list and table detection

- Enhance the accuracy of list detection to correctly identify ordered, unordered, and task lists. - Improve table detection by ensuring a valid separator line exists before confirming a table. - Fix a bug in footnote definition detection to handle cases where the closing bracket is missing.
2025-03-17 22:46:26 +02:00
parent 04ee73e8dd
commit f2138f104f
1 changed files with 31 additions and 26 deletions
--- a/lib/data/markdownparser2/parser_helpers.v
+++ b/lib/data/markdownparser2/parser_helpers.v
@@ -21,33 +21,34 @@ fn (p Parser) is_list_start() bool {
 	if p.pos >= p.text.len {
 		return false
 	}
-	
+
 	// Unordered list: *, -, +
-	if (p.text[p.pos] == `*` || p.text[p.pos] == `-` || p.text[p.pos] == `+`) && 
+	if (p.text[p.pos] == `*` || p.text[p.pos] == `-` || p.text[p.pos] == `+`)
-	   (p.peek(1) == ` ` || p.peek(1) == `\t`) {
+		&& (p.peek(1) == ` ` || p.peek(1) == `\t`) {
 		return true
 	}
-	
+
 	// Ordered list: 1., 2., etc.
 	if p.pos + 2 < p.text.len && p.text[p.pos].is_digit() {
 		mut i := p.pos + 1
 		for i < p.text.len && p.text[i].is_digit() {
 			i++
 		}
-		if i < p.text.len && p.text[i] == `.` && i + 1 < p.text.len && (p.text[i + 1] == ` ` || p.text[i + 1] == `\t`) {
+		if i < p.text.len && p.text[i] == `.` && i + 1 < p.text.len
 			&& (p.text[i + 1] == ` ` || p.text[i + 1] == `\t`) {
 			return true
 		}
 	}
-	
+
 	// Task list: - [ ], - [x], etc.
-	if p.pos + 4 < p.text.len && 
+	if p.pos + 4 < p.text.len
-	   (p.text[p.pos] == `-` || p.text[p.pos] == `*` || p.text[p.pos] == `+`) && 
+		&& (p.text[p.pos] == `-` || p.text[p.pos] == `*` || p.text[p.pos] == `+`)
-	   p.text[p.pos + 1] == ` ` && p.text[p.pos + 2] == `[` && 
+		&& p.text[p.pos + 1] == ` ` && p.text[p.pos + 2] == `[`
-	   (p.text[p.pos + 3] == ` ` || p.text[p.pos + 3] == `x` || p.text[p.pos + 3] == `X`) && 
+		&& (p.text[p.pos + 3] == ` ` || p.text[p.pos + 3] == `x` || p.text[p.pos + 3] == `X`)
-	   p.text[p.pos + 4] == `]` {
+		&& p.text[p.pos + 4] == `]` {
 		return true
 	}
-	
+
 	return false
 }
@@ -56,7 +57,7 @@ fn (p Parser) is_table_start() bool {
 	if p.pos >= p.text.len || p.text[p.pos] != `|` {
 		return false
 	}
-	
+
 	// Look for a pipe character at the beginning of the line
 	// and check if there's at least one more pipe in the line
 	mut has_second_pipe := false
@@ -68,38 +69,39 @@ fn (p Parser) is_table_start() bool {
 		}
 		i++
 	}
-	
+
 	if !has_second_pipe {
 		return false
 	}
-	
+
 	// Check if the next line has a header separator (---|---|...)
 	mut next_line_start := i + 1
 	if next_line_start >= p.text.len {
 		return false
 	}
-	
+
 	// Skip whitespace at the beginning of the next line
-	for next_line_start < p.text.len && (p.text[next_line_start] == ` ` || p.text[next_line_start] == `\t`) {
+	for next_line_start < p.text.len
 		&& (p.text[next_line_start] == ` ` || p.text[next_line_start] == `\t`) {
 		next_line_start++
 	}
-	
+
 	if next_line_start >= p.text.len || p.text[next_line_start] != `|` {
 		return false
 	}
-	
+
 	// Check for pattern like |---|---|...
 	// We just need to check if there's a valid separator line
 	mut j := next_line_start + 1
 	for j < p.text.len && p.text[j] != `\n` {
 		// Only allow -, |, :, space, or tab in the separator line
-		if p.text[j] != `-` && p.text[j] != `|` && p.text[j] != `:` && 
+		if p.text[j] != `-` && p.text[j] != `|` && p.text[j] != `:` && p.text[j] != ` `
-		   p.text[j] != ` ` && p.text[j] != `\t` {
+			&& p.text[j] != `\t` {
 			return false
 		}
 		j++
 	}
-	
+
 	return true
 }
@@ -108,8 +110,11 @@ fn (p Parser) is_footnote_definition() bool {
 	if p.pos + 3 >= p.text.len {
 		return false
 	}
-	
+
-	// Check for pattern like [^id]:
+	if idx := p.text.index_after(']:', p.pos + 2) {
-	return p.text[p.pos] == `[` && p.text[p.pos + 1] == `^` && 
+		return p.text[p.pos] == `[` && p.text[p.pos + 1] == `^` && p.text[p.pos + 2] != `]`
-	       p.text[p.pos + 2] != `]` && p.text.index_after(']:', p.pos + 2) > p.pos + 2
+			&& idx > p.pos + 2
 	} else {
 		return false
 	}
 }