...

2025-11-28 09:01:58 +01:00
parent 60e2230448
commit 0414ea85df
18 changed files with 679 additions and 696 deletions
--- a/lib/data/atlas/client/README.md
+++ b/lib/data/atlas/client/README.md
@@ -17,8 +17,8 @@ AtlasClient provides methods to:
 ```v
 import incubaid.herolib.web.atlas_client

-// Create client
-mut client := atlas_client.new(export_dir: '/tmp/atlas_export')!
+// Create client, exports will be in $/hero/var/atlas_export by default
+mut client := atlas_client.new()!

 // List collections
 collections := client.list_collections()!
--- a/lib/data/atlas/client/client.v
+++ b/lib/data/atlas/client/client.v
@@ -247,20 +247,6 @@ pub fn (mut c AtlasClient) get_collection_metadata(collection_name string) !Coll
 	return metadata
 }

-// get_page_links returns the links found in a page by reading the metadata
-pub fn (mut c AtlasClient) get_page_links(collection_name string, page_name string) ![]LinkMetadata {
-	// Get collection metadata
-	metadata := c.get_collection_metadata(collection_name)!
-	// Apply name normalization to page name
-	fixed_page_name := texttools.name_fix_no_ext(page_name)
-
-	// Find the page in metadata
-	if fixed_page_name in metadata.pages {
-		return metadata.pages[fixed_page_name].links
-	}
-	return error('page_not_found: Page "${page_name}" not found in collection metadata, for collection: "${collection_name}"')
-}
-
 // get_collection_errors returns the errors for a collection from metadata
 pub fn (mut c AtlasClient) get_collection_errors(collection_name string) ![]ErrorMetadata {
 	metadata := c.get_collection_metadata(collection_name)!
@@ -273,6 +259,30 @@ pub fn (mut c AtlasClient) has_errors(collection_name string) bool {
 	return errors.len > 0
 }

+pub fn (mut c AtlasClient) copy_pages(collection_name string, page_name string, destination_path string) ! {
+	// Get page links from metadata
+	links := c.get_page_links(collection_name, page_name)!
+
+	// Create img subdirectory
+	mut img_dest := pathlib.get_dir(path: '${destination_path}', create: true)!
+
+	// Copy only image links
+	for link in links {
+		if link.file_type != .page {
+			continue
+		}
+		if link.status == .external {
+			continue
+		}
+		// Get image path and copy
+		img_path := c.get_page_path(link.target_collection_name, link.target_item_name)!
+		mut src := pathlib.get_file(path: img_path)!
+		src.copy(dest: '${img_dest.path}/${src.name_fix_keepext()}')!
+		console.print_debug(' ********. Copied page: ${src.path} to ${img_dest.path}/${src.name_fix_keepext()}')
+	}
+}
+
+
 pub fn (mut c AtlasClient) copy_images(collection_name string, page_name string, destination_path string) ! {
 	// Get page links from metadata
 	links := c.get_page_links(collection_name, page_name)!
--- a/lib/data/atlas/client/client_links.v
+++ b/lib/data/atlas/client/client_links.v
@@ -0,0 +1,119 @@
+module client
+
+import incubaid.herolib.core.pathlib
+import incubaid.herolib.core.texttools
+import incubaid.herolib.ui.console
+import os
+import json
+import incubaid.herolib.core.redisclient
+
+// get_page_links returns all links found in a page and pages linked to it (recursive)
+// This includes transitive links through page-to-page references
+// External links, files, and images do not recurse further
+pub fn (mut c AtlasClient) get_page_links(collection_name string, page_name string) ![]LinkMetadata {
+	mut visited := map[string]bool{}
+	mut all_links := []LinkMetadata{}
+	c.collect_page_links_recursive(collection_name, page_name, mut visited, mut all_links)!
+	return all_links
+}
+
+
+// collect_page_links_recursive is the internal recursive implementation
+// It traverses all linked pages and collects all links found
+// 
+// Thread safety: Each call to get_page_links gets its own visited map
+// Circular references are prevented by tracking visited pages
+//
+// Link types behavior:
+// - .page links: Recursively traverse to get links from the target page
+// - .file and .image links: Included in results but not recursively expanded
+// - .external links: Included in results but not recursively expanded
+fn (mut c AtlasClient) collect_page_links_recursive(collection_name string, page_name string, mut visited map[string]bool, mut all_links []LinkMetadata) ! {
+	// Create unique key for cycle detection
+	page_key := '${collection_name}:${page_name}'
+	
+	// Prevent infinite loops on circular page references
+	// Example: Page A → Page B → Page A
+	if page_key in visited {
+		return
+	}
+	visited[page_key] = true
+	
+	// Get collection metadata
+	metadata := c.get_collection_metadata(collection_name)!
+	fixed_page_name := texttools.name_fix_no_ext(page_name)
+
+	// Find the page in metadata
+	if fixed_page_name !in metadata.pages {
+		return error('page_not_found: Page "${page_name}" not found in collection metadata, for collection: "${collection_name}"')
+	}
+	
+	page_meta := metadata.pages[fixed_page_name]
+	
+	// Add all direct links from this page to the result
+	// This includes: pages, files, images, and external links
+	all_links << page_meta.links
+	
+	// Recursively traverse only page-to-page links
+	for link in page_meta.links {
+		// Only recursively process links to other pages within the atlas
+		// Skip external links (http, https, mailto, etc.)
+		// Skip file and image links (these don't have "contained" links)
+		if link.file_type != .page || link.status == .external {
+			continue
+		}
+		
+		// Recursively collect links from the target page
+		c.collect_page_links_recursive(link.target_collection_name, link.target_item_name, mut visited, mut all_links) or {
+			// If we encounter an error (e.g., target page doesn't exist in metadata),
+			// we continue processing other links rather than failing completely
+			// This provides graceful degradation for broken link references
+			continue
+		}
+	}
+}
+
+// get_image_links returns all image links found in a page and related pages (recursive)
+// This is a convenience function that filters get_page_links to only image links
+pub fn (mut c AtlasClient) get_image_links(collection_name string, page_name string) ![]LinkMetadata {
+	all_links := c.get_page_links(collection_name, page_name)!
+	mut image_links := []LinkMetadata{}
+	
+	for link in all_links {
+		if link.file_type == .image {
+			image_links << link
+		}
+	}
+	
+	return image_links
+}
+
+// get_file_links returns all file links (non-image) found in a page and related pages (recursive)
+// This is a convenience function that filters get_page_links to only file links
+pub fn (mut c AtlasClient) get_file_links(collection_name string, page_name string) ![]LinkMetadata {
+	all_links := c.get_page_links(collection_name, page_name)!
+	mut file_links := []LinkMetadata{}
+	
+	for link in all_links {
+		if link.file_type == .file {
+			file_links << link
+		}
+	}
+	
+	return file_links
+}
+
+// get_page_link_targets returns all page-to-page link targets found in a page and related pages
+// This is a convenience function that filters get_page_links to only page links
+pub fn (mut c AtlasClient) get_page_link_targets(collection_name string, page_name string) ![]LinkMetadata {
+	all_links := c.get_page_links(collection_name, page_name)!
+	mut page_links := []LinkMetadata{}
+	
+	for link in all_links {
+		if link.file_type == .page && link.status != .external {
+			page_links << link
+		}
+	}
+	
+	return page_links
+}
--- a/lib/data/atlas/export.v
+++ b/lib/data/atlas/export.v
@@ -7,7 +7,7 @@ import json
@[params]
 pub struct ExportArgs {
 pub mut:
-	destination string @[requireds]
+	destination string @[required]
 	reset       bool = true
 	include     bool = true
 	redis       bool = true
@@ -90,6 +90,44 @@ pub fn (mut c Collection) export(args CollectionExportArgs) ! {
 		c.collect_cross_collection_references(mut page, mut cross_collection_pages, mut
 			cross_collection_files, mut processed_cross_pages)!

+		// println('------- ${c.name} ${page.key()}')
+		// if page.key() == 'geoaware:solution' && c.name == 'mycelium_nodes_tiers' {
+		// 	println(cross_collection_pages)
+		// 	println(cross_collection_files)
+		// 	// println(processed_cross_pages)
+		// 	$dbg;
+		// }
+
+		// copy the pages to the right exported path
+		for _, mut ref_page in cross_collection_pages {
+			mut src_file := ref_page.path()!
+			mut subdir_path := pathlib.get_dir(
+				path:   '${col_dir.path}'
+				create: true
+			)!
+			mut dest_path := '${subdir_path.path}/${ref_page.name}.md'
+			src_file.copy(dest: dest_path)!
+			// println(dest_path)
+			// $dbg;
+		}
+		// copy the files to the right exported path
+		for _, mut ref_file in cross_collection_files {
+			mut src_file2 := ref_file.path()!
+
+			// Determine subdirectory based on file type
+			mut subdir := if ref_file.is_image() { 'img' } else { 'files' }
+
+			// Ensure subdirectory exists
+			mut subdir_path := pathlib.get_dir(
+				path:   '${col_dir.path}/${subdir}'
+				create: true
+			)!
+
+			mut dest_path := '${subdir_path.path}/${ref_file.name}'
+			mut dest_file2 := pathlib.get_file(path: dest_path, create: true)!
+			src_file2.copy(dest: dest_file2.path)!
+		}
+
 		processed_local_pages[page.name] = true

 		// Redis operations...
@@ -117,65 +155,6 @@ pub fn (mut c Collection) export(args CollectionExportArgs) ! {
 		mut dest_file := pathlib.get_file(path: dest_path, create: true)!
 		src_file.copy(dest: dest_file.path)!
 	}
-
-	// Second pass: copy all collected cross-collection pages and process their links recursively
-	// Keep iterating until no new cross-collection references are found
-	for {
-		mut found_new_references := false
-
-		// Process all cross-collection pages we haven't processed yet
-		for page_key, mut ref_page in cross_collection_pages {
-			if page_key in processed_cross_pages {
-				continue // Already processed this page's links
-			}
-
-			// Mark as processed to avoid infinite loops
-			processed_cross_pages[page_key] = true
-			found_new_references = true
-
-			// Get the referenced page content with includes processed
-			ref_content := ref_page.content_with_fixed_links(
-				include:          args.include
-				cross_collection: true
-				export_mode:      true
-			)!
-
-			// Write the referenced page to this collection's directory
-			mut dest_file := pathlib.get_file(
-				path:   '${col_dir.path}/${ref_page.name}.md'
-				create: true
-			)!
-			dest_file.write(ref_content)!
-
-			// CRITICAL: Recursively process links in this cross-collection page
-			// This ensures we get pages/files/images referenced by ref_page
-			c.collect_cross_collection_references(mut ref_page, mut cross_collection_pages, mut
-				cross_collection_files, mut processed_cross_pages)!
-		}
-
-		// If we didn't find any new references, we're done with the recursive pass
-		if !found_new_references {
-			break
-		}
-	}
-
-	// Third pass: copy ALL collected cross-collection referenced files/images
-	for _, mut ref_file in cross_collection_files {
-		mut src_file := ref_file.path()!
-
-		// Determine subdirectory based on file type
-		mut subdir := if ref_file.is_image() { 'img' } else { 'files' }
-
-		// Ensure subdirectory exists
-		mut subdir_path := pathlib.get_dir(
-			path:   '${col_dir.path}/${subdir}'
-			create: true
-		)!
-
-		mut dest_path := '${subdir_path.path}/${ref_file.name}'
-		mut dest_file := pathlib.get_file(path: dest_path, create: true)!
-		src_file.copy(dest: dest_file.path)!
-	}
 }

 // Helper function to recursively collect cross-collection references
@@ -184,6 +163,17 @@ fn (mut c Collection) collect_cross_collection_references(mut page Page,
 	mut all_cross_pages map[string]&Page,
 	mut all_cross_files map[string]&File,
 	mut processed_pages map[string]bool) ! {
+	page_key := page.key()
+
+	// If we've already processed this page, skip it (prevents infinite loops with cycles)
+	if page_key in processed_pages {
+		return
+	}
+
+	// Mark this page as processed BEFORE recursing (prevents infinite loops with circular references)
+	processed_pages[page_key] = true
+
+	// Process all links in the current page
 	// Use cached links from validation (before transformation) to preserve collection info
 	for mut link in page.links {
 		if link.status != .found {
@@ -192,15 +182,19 @@ fn (mut c Collection) collect_cross_collection_references(mut page Page,

 		is_local := link.target_collection_name == c.name

-		// Collect cross-collection page references
+		// Collect cross-collection page references and recursively process them
 		if link.file_type == .page && !is_local {
-			page_key := '${link.target_collection_name}:${link.target_item_name}'
+			page_ref := '${link.target_collection_name}:${link.target_item_name}'

 			// Only add if not already collected
-			if page_key !in all_cross_pages {
+			if page_ref !in all_cross_pages {
 				mut target_page := link.target_page()!
-				all_cross_pages[page_key] = target_page
-				// Don't mark as processed yet - we'll do that when we actually process its links
+				all_cross_pages[page_ref] = target_page
+
+				// Recursively process the target page's links to find more cross-collection references
+				// This ensures we collect ALL transitive cross-collection page and file references
+				c.collect_cross_collection_references(mut target_page, mut all_cross_pages, mut
+					all_cross_files, mut processed_pages)!
 			}
 		}

--- a/lib/data/atlas/instruction.md
+++ b/lib/data/atlas/instruction.md
@@ -1,15 +0,0 @@
-in atlas/
-
-check format of groups
-see content/groups
-
-now the groups end with .group
-
-check how the include works, so we can include another group in the group as defined, only works in same folder
-
-in the scan function in atlas, now make scan_groups function, find groups, only do this for collection as named groups
-do not add collection groups to atlas, this is a system collection
-
-make the groups and add them to atlas
-
-give clear instructions for coding agent how to write the code
--- a/lib/data/atlas/play.v
+++ b/lib/data/atlas/play.v
@@ -3,6 +3,7 @@ module atlas
 import incubaid.herolib.core.playbook { PlayBook }
 import incubaid.herolib.develop.gittools
 import incubaid.herolib.ui.console
+import os

 // Play function to process HeroScript actions for Atlas
 pub fn play(mut plbook PlayBook) ! {
@@ -66,7 +67,7 @@ pub fn play(mut plbook PlayBook) ! {
 	for mut action in export_actions {
 		mut p := action.params
 		name = p.get_default('name', 'main')!
-		destination := p.get_default('destination', '/tmp/atlas_export')!
+		destination := p.get_default('destination', '${os.home_dir()}/hero/var/atlas_export')!
 		reset := p.get_default_true('reset')
 		include := p.get_default_true('include')
 		redis := p.get_default_true('redis')
--- a/lib/data/atlas/process.md
+++ b/lib/data/atlas/process.md
@@ -1,4 +0,0 @@
-
-
- first find all pages
- then for each page find all links
--- a/lib/data/atlas/readme.md
+++ b/lib/data/atlas/readme.md
@@ -33,7 +33,7 @@ put in .hero file and execute with hero or but shebang line on top of .hero scri

 !!atlas.scan git_url:"https://git.ourworld.tf/tfgrid/docs_tfgrid4/src/branch/main/collections/tests"

-!!atlas.export destination: '/tmp/atlas_export' 
+!!atlas.export 

 ```