From f4711681dc31392d2a79d4a264be71960682ee9c Mon Sep 17 00:00:00 2001 From: despiegk Date: Thu, 16 Oct 2025 10:12:02 +0400 Subject: [PATCH] ... --- lib/data/atlas/atlas.v | 14 +++ lib/data/atlas/atlas_test.v | 173 +++++++++++++++++++++++++++ lib/data/atlas/collection.v | 23 ++++ lib/data/atlas/export.v | 11 +- lib/data/atlas/link.v | 228 ++++++++++++++++++++++++++++++++++++ lib/data/atlas/readme.md | 115 ++++++++++++++++++ 6 files changed, 560 insertions(+), 4 deletions(-) create mode 100644 lib/data/atlas/link.v diff --git a/lib/data/atlas/atlas.v b/lib/data/atlas/atlas.v index c3dfd570..974c4b45 100644 --- a/lib/data/atlas/atlas.v +++ b/lib/data/atlas/atlas.v @@ -99,3 +99,17 @@ pub fn (a Atlas) get_collection(name string) !&Collection { } } } + +// Validate all links in all collections +pub fn (mut a Atlas) validate_links() ! { + for _, mut col in a.collections { + col.validate_links()! + } +} + +// Fix all links in all collections +pub fn (mut a Atlas) fix_links() ! { + for _, mut col in a.collections { + col.fix_links()! + } +} diff --git a/lib/data/atlas/atlas_test.v b/lib/data/atlas/atlas_test.v index 8bac83fb..63f16472 100644 --- a/lib/data/atlas/atlas_test.v +++ b/lib/data/atlas/atlas_test.v @@ -168,4 +168,177 @@ fn test_error_hash() { } assert err1.hash() == err2.hash() +} + +fn test_find_links() { + content := ' +# Test Page + +[Link 1](page1) +[Link 2](guides:intro) +[Link 3](/path/to/page2) +[External](https://example.com) +[Anchor](#section) +' + + links := find_links(content) + + // Should find 3 local links + local_links := links.filter(it.is_local) + assert local_links.len == 3 + + // Check collection:page format + link2 := local_links[1] + assert link2.collection == 'guides' + assert link2.page == 'intro' + + // Check path-based link (only filename used) + link3 := local_links[2] + assert link3.page == 'page2' + assert link3.collection == '' +} + +fn test_validate_links() { + // Setup + col_path := '${test_base}/link_test' + os.mkdir_all(col_path)! + + mut cfile := pathlib.get_file(path: '${col_path}/.collection', create: true)! + cfile.write('name:test_col')! + + // Create page1 with valid link + mut page1 := pathlib.get_file(path: '${col_path}/page1.md', create: true)! + page1.write('[Link to page2](page2)')! + + // Create page2 (target exists) + mut page2 := pathlib.get_file(path: '${col_path}/page2.md', create: true)! + page2.write('# Page 2')! + + mut a := new()! + a.add_collection(name: 'test_col', path: col_path)! + + // Validate + a.validate_links()! + + // Should have no errors + col := a.get_collection('test_col')! + assert col.errors.len == 0 +} + +fn test_validate_broken_links() { + // Setup + col_path := '${test_base}/broken_link_test' + os.mkdir_all(col_path)! + + mut cfile := pathlib.get_file(path: '${col_path}/.collection', create: true)! + cfile.write('name:test_col')! + + // Create page with broken link + mut page1 := pathlib.get_file(path: '${col_path}/page1.md', create: true)! + page1.write('[Broken link](nonexistent)')! + + mut a := new()! + a.add_collection(name: 'test_col', path: col_path)! + + // Validate + a.validate_links()! + + // Should have error + col := a.get_collection('test_col')! + assert col.errors.len == 1 + assert col.errors[0].category == .invalid_page_reference +} + +fn test_fix_links() { + // Setup - all pages in same directory for simpler test + col_path := '${test_base}/fix_link_test' + os.mkdir_all(col_path)! + + mut cfile := pathlib.get_file(path: '${col_path}/.collection', create: true)! + cfile.write('name:test_col')! + + // Create pages in same directory + mut page1 := pathlib.get_file(path: '${col_path}/page1.md', create: true)! + page1.write('[Link](page2)')! + + mut page2 := pathlib.get_file(path: '${col_path}/page2.md', create: true)! + page2.write('# Page 2')! + + mut a := new()! + a.add_collection(name: 'test_col', path: col_path)! + + // Get the page and test fix_links directly + mut col := a.get_collection('test_col')! + mut p := col.page_get('page1')! + + original := p.read_content()! + println('Original: ${original}') + + fixed := p.fix_links(original)! + println('Fixed: ${fixed}') + + // The fix_links should work on content + assert fixed.contains('[Link](page2.md)') +} + +fn test_link_formats() { + content := ' +[Same collection](page1) +[With extension](page2.md) +[Collection ref](guides:intro) +[Path based](/some/path/page3) +[Relative path](../other/page4.md) +' + + links := find_links(content) + local_links := links.filter(it.is_local) + + assert local_links.len == 5 + + // Check normalization + assert local_links[0].page == 'page1' + assert local_links[1].page == 'page2' + assert local_links[2].collection == 'guides' + assert local_links[2].page == 'intro' + assert local_links[3].page == 'page3' // Path ignored, only filename + assert local_links[4].page == 'page4' // Path ignored, only filename +} + +fn test_cross_collection_links() { + // Setup two collections + col1_path := '${test_base}/col1_cross' + col2_path := '${test_base}/col2_cross' + + os.mkdir_all(col1_path)! + os.mkdir_all(col2_path)! + + mut cfile1 := pathlib.get_file(path: '${col1_path}/.collection', create: true)! + cfile1.write('name:col1')! + + mut cfile2 := pathlib.get_file(path: '${col2_path}/.collection', create: true)! + cfile2.write('name:col2')! + + // Page in col1 links to col2 + mut page1 := pathlib.get_file(path: '${col1_path}/page1.md', create: true)! + page1.write('[Link to col2](col2:page2)')! + + // Page in col2 + mut page2 := pathlib.get_file(path: '${col2_path}/page2.md', create: true)! + page2.write('# Page 2')! + + mut a := new()! + a.add_collection(name: 'col1', path: col1_path)! + a.add_collection(name: 'col2', path: col2_path)! + + // Validate - should pass + a.validate_links()! + + col1 := a.get_collection('col1')! + assert col1.errors.len == 0 + + // Fix links - cross-collection links should NOT be rewritten + a.fix_links()! + + fixed := page1.read()! + assert fixed.contains('[Link to col2](col2:page2)') // Unchanged } \ No newline at end of file diff --git a/lib/data/atlas/collection.v b/lib/data/atlas/collection.v index d10eab0e..cebf3b9a 100644 --- a/lib/data/atlas/collection.v +++ b/lib/data/atlas/collection.v @@ -295,3 +295,26 @@ pub fn (c Collection) print_errors() { console.print_stderr(' ${err.str()}') } } + +// Validate all links in collection +pub fn (mut c Collection) validate_links() ! { + for _, mut page in c.pages { + page.validate_links()! + } +} + +// Fix all links in collection (rewrite files) +pub fn (mut c Collection) fix_links() ! { + for _, mut page in c.pages { + // Read original content + content := page.read_content()! + + // Fix links + fixed_content := page.fix_links(content)! + + // Write back if changed + if fixed_content != content { + page.path.write(fixed_content)! + } + } +} diff --git a/lib/data/atlas/export.v b/lib/data/atlas/export.v index 7e5b1bbf..82e87109 100644 --- a/lib/data/atlas/export.v +++ b/lib/data/atlas/export.v @@ -19,6 +19,9 @@ pub fn (mut a Atlas) export(args ExportArgs) ! { dest.empty()! } + // Validate links before export + a.validate_links()! + for _, mut col in a.collections { col.export( destination: dest @@ -27,9 +30,9 @@ pub fn (mut a Atlas) export(args ExportArgs) ! { redis: args.redis )! - // Print errors for this collection if any - if col.has_errors() { - col.print_errors() - } + // Print errors for this collection if any + if col.has_errors() { + col.print_errors() + } } } \ No newline at end of file diff --git a/lib/data/atlas/link.v b/lib/data/atlas/link.v new file mode 100644 index 00000000..e05aefb0 --- /dev/null +++ b/lib/data/atlas/link.v @@ -0,0 +1,228 @@ +module atlas + +import incubaid.herolib.core.texttools +import incubaid.herolib.core.pathlib +import os + +// Link represents a markdown link found in content +pub struct Link { +pub mut: + text string // Link text [text] + target string // Original link target + line int // Line number + col_start int // Column start position + col_end int // Column end position + collection string // Target collection (if specified) + page string // Target page name (normalized) + is_local bool // Whether link points to local page + valid bool // Whether link target exists +} + +// Find all markdown links in content +pub fn find_links(content string) []Link { + mut links := []Link{} + lines := content.split_into_lines() + + for line_idx, line in lines { + mut pos := 0 + for { + // Find next [ + open_bracket := line.index_after('[', pos) or { break } + + // Find matching ] + close_bracket := line.index_after(']', open_bracket) or { break } + + // Check for ( + if close_bracket + 1 >= line.len || line[close_bracket + 1] != `(` { + pos = close_bracket + 1 + continue + } + + // Find matching ) + open_paren := close_bracket + 1 + close_paren := line.index_after(')', open_paren) or { break } + + // Extract link components + text := line[open_bracket + 1..close_bracket] + target := line[open_paren + 1..close_paren] + + mut link := Link{ + text: text + target: target.trim_space() + line: line_idx + 1 + col_start: open_bracket + col_end: close_paren + 1 + } + + parse_link_target(mut link) + links << link + + pos = close_paren + 1 + } + } + + return links +} + +// Parse link target to extract collection and page +fn parse_link_target(mut link Link) { + target := link.target + + // Skip external links + if target.starts_with('http://') || target.starts_with('https://') + || target.starts_with('mailto:') || target.starts_with('ftp://') { + return + } + + // Skip anchors + if target.starts_with('#') { + return + } + + link.is_local = true + + // Format: $collection:$pagename or $collection:$pagename.md + if target.contains(':') { + parts := target.split(':') + if parts.len >= 2 { + link.collection = texttools.name_fix(parts[0]) + link.page = normalize_page_name(parts[1]) + } + return + } + + // For all other formats, extract filename from path (ignore path components) + // Handles: $page, path/to/$page, /path/to/$page, /path/to/$page.md + filename := os.base(target) + link.page = normalize_page_name(filename) +} + +// Normalize page name (remove .md, apply name_fix) +fn normalize_page_name(name string) string { + mut clean := name + if clean.ends_with('.md') { + clean = clean[0..clean.len - 3] + } + return texttools.name_fix(clean) +} + +// Validate links in page +pub fn (mut p Page) validate_links() ! { + content := p.read_content()! + links := find_links(content) + + for link in links { + if !link.is_local { + continue + } + + // Determine target collection + mut target_collection := link.collection + if target_collection == '' { + target_collection = p.collection_name + } + + // Check if page exists + page_key := '${target_collection}:${link.page}' + if !p.collection.atlas.page_exists(page_key) { + p.collection.error( + category: .invalid_page_reference + page_key: p.key() + message: 'Broken link to `${page_key}` at line ${link.line}: [${link.text}](${link.target})' + show_console: false + ) + } + } +} + +// Fix links in page content - rewrites links with proper relative paths +pub fn (mut p Page) fix_links(content string) !string { + links := find_links(content) + if links.len == 0 { + return content + } + + mut result := content + + // Process links in reverse order to maintain positions + for link in links.reverse() { + if !link.is_local || link.page == '' { + continue + } + + // Determine target collection + mut target_collection := link.collection + if target_collection == '' { + target_collection = p.collection_name + } + + // Only fix links within same collection + if target_collection != p.collection_name { + continue + } + + // Get target page + page_key := '${target_collection}:${link.page}' + mut target_page := p.collection.atlas.page_get(page_key) or { + // Skip if page doesn't exist - error already reported in validate + continue + } + + // Calculate relative path + relative_path := calculate_relative_path(mut p.path, mut target_page.path) + + // Build replacement + old_link := '[${link.text}](${link.target})' + new_link := '[${link.text}](${relative_path})' + + // Replace in content + result = result.replace(old_link, new_link) + } + + return result +} + +// Calculate relative path from source file to target file with .md extension +fn calculate_relative_path(mut from pathlib.Path, mut to pathlib.Path) string { + from_dir := from.path_dir() + to_dir := to.path_dir() + to_name := to.name_fix_no_ext() + + // If in same directory, just return filename with .md + if from_dir == to_dir { + return '${to_name}.md' + } + + // Split paths into parts + from_parts := from_dir.split(os.path_separator).filter(it != '') + to_parts := to_dir.split(os.path_separator).filter(it != '') + + // Find common base + mut common_len := 0 + for i := 0; i < from_parts.len && i < to_parts.len; i++ { + if from_parts[i] == to_parts[i] { + common_len = i + 1 + } else { + break + } + } + + // Build relative path + mut rel_parts := []string{} + + // Add ../ for each directory we need to go up + up_count := from_parts.len - common_len + for _ in 0..up_count { + rel_parts << '..' + } + + // Add path down to target + for i := common_len; i < to_parts.len; i++ { + rel_parts << to_parts[i] + } + + // Add filename with .md extension + rel_parts << '${to_name}.md' + + return rel_parts.join('/') +} \ No newline at end of file diff --git a/lib/data/atlas/readme.md b/lib/data/atlas/readme.md index a9e7800d..46b419b4 100644 --- a/lib/data/atlas/readme.md +++ b/lib/data/atlas/readme.md @@ -214,6 +214,121 @@ content := page.read_content()! Atlas automatically detects circular includes and reports them as errors without causing infinite loops. +## Links + +Atlas supports standard Markdown links with several formats for referencing pages within collections. + +### Link Formats + +#### 1. Explicit Collection Reference +Link to a page in a specific collection: +```md +[Click here](guides:introduction) +[Click here](guides:introduction.md) +``` + +#### 2. Same Collection Reference +Link to a page in the same collection (collection name omitted): +```md +[Click here](introduction) +``` + +#### 3. Path-Based Reference +Link using a path - **only the filename is used** for matching: +```md +[Click here](some/path/introduction) +[Click here](/absolute/path/introduction) +[Click here](path/to/introduction.md) +``` + +**Important:** Paths are ignored during link resolution. Only the page name (filename) is used to find the target page within the same collection. + +### Link Processing + +#### Validation + +Check all links in your Atlas: + +```v +mut a := atlas.new()! +a.scan(path: './docs')! + +// Validate all links +a.validate_links()! + +// Check for errors +for _, col in a.collections { + if col.has_errors() { + col.print_errors() + } +} +``` + +#### Fixing Links + +Automatically rewrite links with correct relative paths: + +```v +mut a := atlas.new()! +a.scan(path: './docs')! + +// Fix all links in place +a.fix_links()! + +// Or fix links in a specific collection +mut col := a.get_collection('guides')! +col.fix_links()! +``` + +**What `fix_links()` does:** +- Finds all local page links +- Calculates correct relative paths +- Rewrites links as `[text](relative/path/pagename.md)` +- Only fixes links within the same collection +- Preserves `!!include` actions unchanged +- Writes changes back to files + +#### Example + +Before fix: +```md +# My Page + +[Introduction](introduction) +[Setup](/some/old/path/setup) +[Guide](guides:advanced) +``` + +After fix (assuming pages are in subdirectories): +```md +# My Page + +[Introduction](../intro/introduction.md) +[Setup](setup.md) +[Guide](guides:advanced) +``` + +### Link Rules + +1. **Name Normalization**: All page names are normalized using `name_fix()` (lowercase, underscores, etc.) +2. **Same Collection Only**: `fix_links()` only rewrites links within the same collection +3. **Cross-Collection Links**: Links with explicit collection references (e.g., `guides:page`) are validated but not rewritten +4. **External Links**: HTTP(S), mailto, and anchor links are ignored +5. **Error Reporting**: Broken links are reported with file, line number, and link details + +### Export with Link Validation + +Links are automatically validated during export: + +```v +a.export( + destination: './output' + include: true +)! + +// Errors are printed for each collection automatically +``` + ## Redis Integration Atlas uses Redis to store metadata about collections, pages, images, and files for fast lookups and caching.