From 9240e2ede8713706f32ecfde92a06d6cbe56ea6b Mon Sep 17 00:00:00 2001
From: Mahmoud-Emad <mahmmoud.hassanein@gmail.com>
Date: Wed, 15 Oct 2025 16:44:02 +0300
Subject: [PATCH] fix: Improve Docusaurus link generation logic

- Add function to strip numeric prefixes from filenames
- Strip numeric prefixes from links for Docusaurus compatibility
- Fix same-collection relative links
- Convert collection:page links to relative paths
- Remove .md extensions from generated links
---
 lib/web/docusaurus/dsite_generate_docs.v | 114 ++++++++++++++++++++---
 1 file changed, 101 insertions(+), 13 deletions(-)

diff --git a/lib/web/docusaurus/dsite_generate_docs.v b/lib/web/docusaurus/dsite_generate_docs.v
index 930227d8..f656b655 100644
--- a/lib/web/docusaurus/dsite_generate_docs.v
+++ b/lib/web/docusaurus/dsite_generate_docs.v
@@ -159,46 +159,134 @@ fn (mut generator SiteGenerator) section_generate(args_ Section) ! {
 	catfile.write(c)!
 }
 
-// Fix links to account for nested categories in Docusaurus
-// Doctree exports links as ../collection/page.md but Docusaurus may have nested paths
+// Strip numeric prefix from filename (e.g., "03_linux_installation" -> "linux_installation")
+// Docusaurus automatically strips these prefixes from URLs
+fn strip_numeric_prefix(name string) string {
+	// Match pattern: digits followed by underscore at the start
+	if name.len > 2 && name[0].is_digit() {
+		for i := 1; i < name.len; i++ {
+			if name[i] == `_` {
+				// Found the underscore, return everything after it
+				return name[i + 1..]
+			}
+			if !name[i].is_digit() {
+				// Not a numeric prefix pattern, return as-is
+				return name
+			}
+		}
+	}
+	return name
+}
+
+// Fix links to account for nested categories and Docusaurus URL conventions
 fn (generator SiteGenerator) fix_links(content string) string {
 	mut result := content
 
-	// Build a map of collection name to actual directory path
-	mut collection_paths := map[string]string{}
+	// Build maps for link fixing
+	mut collection_paths := map[string]string{} // collection -> directory path (for nested collections)
+	mut page_to_path := map[string]string{} // page_name -> full directory path in Docusaurus
+	mut collection_page_map := map[string]string{} // "collection:page" -> directory path
+
 	for page in generator.site.pages {
 		parts := page.src.split(':')
 		if parts.len != 2 {
 			continue
 		}
 		collection := parts[0]
+		page_name := parts[1]
 
 		// Extract directory path from page.path
-		// page.path can be like "appendix/internet_today/" or "appendix/internet_today/page.md"
 		mut dir_path := page.path.trim('/')
-
-		// If path ends with a filename, remove it to get just the directory
 		if dir_path.contains('/') && !dir_path.ends_with('/') {
-			// Check if last part looks like a filename (has extension or is a page name)
 			last_part := dir_path.all_after_last('/')
-			if last_part.contains('.') || last_part == parts[1] {
+			if last_part.contains('.') || last_part == page_name {
 				dir_path = dir_path.all_before_last('/')
 			}
 		}
 
-		// If the directory path is different from collection name, store the mapping
-		// This handles nested categories like appendix/internet_today
+		// Store collection -> directory mapping for nested collections
 		if dir_path != collection && dir_path != '' {
 			collection_paths[collection] = dir_path
 		}
+
+		// Store page_name -> directory path for fixing same-collection links
+		// Strip numeric prefix from page_name for the map key
+		clean_page_name := strip_numeric_prefix(page_name)
+		page_to_path[clean_page_name] = dir_path
+
+		// Store collection:page -> directory path for fixing collection:page format links
+		collection_page_map['${collection}:${clean_page_name}'] = dir_path
 	}
 
-	// Replace ../collection/ with ../actual/nested/path/ for nested collections
+	// STEP 1: Strip numeric prefixes from all page references in links FIRST
+	mut lines := result.split('\n')
+	for i, line in lines {
+		if !line.contains('](') {
+			continue
+		}
+
+		mut new_line := line
+		parts := line.split('](')
+		if parts.len < 2 {
+			continue
+		}
+
+		for j := 1; j < parts.len; j++ {
+			close_idx := parts[j].index(')') or { continue }
+			link_url := parts[j][..close_idx]
+
+			mut new_url := link_url
+			if link_url.contains('/') {
+				path_part := link_url.all_before_last('/')
+				file_part := link_url.all_after_last('/')
+				new_file := strip_numeric_prefix(file_part)
+				if new_file != file_part {
+					new_url = '${path_part}/${new_file}'
+				}
+			} else {
+				new_url = strip_numeric_prefix(link_url)
+			}
+
+			if new_url != link_url {
+				new_line = new_line.replace('](${link_url})', '](${new_url})')
+			}
+		}
+		lines[i] = new_line
+	}
+	result = lines.join('\n')
+
+	// STEP 2: Replace ../collection/ with ../actual/nested/path/ for cross-collection links
 	for collection, actual_path in collection_paths {
 		result = result.replace('../${collection}/', '../${actual_path}/')
 	}
 
-	// Remove .md extensions from all links (Docusaurus doesn't use them in URLs)
+	// STEP 3: Fix same-collection links: ./page -> correct path based on Docusaurus structure
+	for page_name, target_dir in page_to_path {
+		old_link := './${page_name}'
+		if result.contains(old_link) && target_dir != '' {
+			new_link := '../${target_dir}/${page_name}'
+			result = result.replace(old_link, new_link)
+		}
+	}
+
+	// STEP 4: Convert collection:page format to proper relative paths
+	// Pattern: collection:page_name -> ../dir/page_name
+	for collection_page, target_dir in collection_page_map {
+		old_pattern := collection_page
+		if result.contains(old_pattern) {
+			// Extract just the page name from "collection:page"
+			page_name := collection_page.all_after(':')
+			mut new_link := ''
+			if target_dir != '' {
+				new_link = '../${target_dir}/${page_name}'
+			} else {
+				new_link = './${page_name}'
+			}
+			result = result.replace(old_pattern, new_link)
+		}
+	}
+
+	// STEP 5: Remove .md extensions from all links (Docusaurus doesn't use them in URLs)
 	result = result.replace('.md)', ')')
 
 	return result