diff --git a/examples/data/regex_example.vsh b/examples/data/regex_example.vsh new file mode 100755 index 00000000..f8fd7a65 --- /dev/null +++ b/examples/data/regex_example.vsh @@ -0,0 +1,35 @@ +#!/usr/bin/env -S v -n -w -gc none -cg -cc tcc -d use_openssl -enable-globals run + +fn extract_image_markdown(s string) !(string, string) { + start := s.index('![') or { return error('Missing ![') } + alt_start := start + 2 + alt_end := s.index_after(']', alt_start) or { return error('Missing ]') } + if s.len <= alt_end + 1 || s[alt_end + 1] != `(` { + return error('Missing opening ( after ]') + } + url_start := alt_end + 2 + url_end := s.index_after(')', url_start) or { return error('Missing closing )') } + + alt := s[alt_start..alt_end] + url := s[url_start..url_end] + return alt, url +} + +fn main() { + text := 'Here is an image: ![Alt](http://example.com/image.png) and another ![Logo](https://site.org/logo.svg)' + + mut i := 0 + for { + if i >= text.len { break } + if text[i..].contains('![') { + snippet := text[i..] + alt, url := extract_image_markdown(snippet) or { + break + } + println('Alt: "$alt" | URL: "$url"') + i += snippet.index_after(')', 0) or { break } + 1 + } else { + break + } + } +} diff --git a/lib/web/doctreeclient/client.v b/lib/web/doctreeclient/client.v index 59eaa42d..b4363c39 100644 --- a/lib/web/doctreeclient/client.v +++ b/lib/web/doctreeclient/client.v @@ -2,7 +2,10 @@ module doctreeclient import freeflowuniverse.herolib.core.pathlib import freeflowuniverse.herolib.core.texttools +import freeflowuniverse.herolib.ui.console import os +import regex + // List of recognized image file extensions const image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.bmp', '.tiff', '.ico'] @@ -285,3 +288,67 @@ pub fn (mut c DocTreeClient) list_markdown() !string { } return markdown_output } + +// get_page_paths returns the path of a page and the paths of its linked images. +// Returns (page_path, image_paths) +pub fn (mut c DocTreeClient) get_page_paths(collection_name string, page_name string) !(string, []string) { + // Get the page path + page_path := c.get_page_path(collection_name, page_name)! + page_content := c.get_page_content(collection_name, page_name)! + + // Extract image names from the page content + image_names := extract_image_links(page_content)! + + mut image_paths := []string{} + for image_name in image_names { + // Get the path for each image + image_path := c.get_image_path(collection_name, image_name) or { + // If an image is not found, log a warning and continue, don't fail the whole operation + return error('Error: Linked image "${image_name}" not found in collection "${collection_name}". Skipping.') + } + image_paths << image_path + } + + return page_path, image_paths +} + +// copy_page copies a page and its linked images to a specified destination. +pub fn (mut c DocTreeClient) copy_page(collection_name string, page_name string, destination_path string) ! { + console.print_debug('copy_page: Copying page "${page_name}" from collection "${collection_name}" to "${destination_path}"') + // Get the page path and linked image paths + page_path, image_paths := c.get_page_paths(collection_name, page_name)! + + console.print_debug('copy_page: Page path: "${page_path}"') + console.print_debug('copy_page: Linked image paths: ${image_paths}') + + if true{panic("sdsdsd7")} + + // Ensure the destination directory exists + console.print_debug('copy_page: Ensuring destination directory "${destination_path}" exists.') + os.mkdir_all(destination_path)! + console.print_debug('copy_page: Destination directory created/exists.') + + // Copy the page file + page_file_name := os.base(page_path) + dest_page_path := os.join_path(destination_path, page_file_name) + console.print_debug('copy_page: Copying page file from "${page_path}" to "${dest_page_path}"') + os.cp(page_path, dest_page_path)! + console.print_debug('copy_page: Page file copied.') + + // Create an 'images' subdirectory within the destination + images_dest_path := os.join_path(destination_path, 'images') + console.print_debug('copy_page: Ensuring images directory "${images_dest_path}" exists.') + os.mkdir_all(images_dest_path)! + console.print_debug('copy_page: Images directory created/exists.') + + // Copy each linked image + for image_path in image_paths { + image_file_name := os.base(image_path) + dest_image_path := os.join_path(images_dest_path, image_file_name) + console.print_debug('copy_page: Copying image file from "${image_path}" to "${dest_image_path}"') + os.cp(image_path, dest_image_path)! + console.print_debug('copy_page: Image file "${image_file_name}" copied.') + } + console.print_debug('copy_page: All files copied successfully.') +} + diff --git a/lib/web/doctreeclient/extract_links.v b/lib/web/doctreeclient/extract_links.v new file mode 100644 index 00000000..08cd945e --- /dev/null +++ b/lib/web/doctreeclient/extract_links.v @@ -0,0 +1,44 @@ +module doctreeclient + +pub fn extract_image_links(s string) ![]string { + mut result := []string{} + mut current_pos := 0 + for { + if current_pos >= s.len { + break + } + + // Find the start of an image markdown link + start_index := s.index_after('![', current_pos) or { -1 } + if start_index == -1 { + break // No more image links found + } + + // Find the closing bracket for alt text + alt_end_index := s.index_after(']', start_index) or { -1 } + if alt_end_index == -1 { + break + } + + // Check for opening parenthesis for URL + if alt_end_index + 1 >= s.len || s[alt_end_index + 1] != `(` { + current_pos = alt_end_index + 1 // Move past this invalid sequence + continue + } + + // Find the closing parenthesis for URL + url_start_index := alt_end_index + 2 + url_end_index := s.index_after(')', url_start_index) or { -1 } + if url_end_index == -1 { + break + } + + // Extract the URL + url := s[url_start_index..url_end_index] + result << url + + // Move current_pos past the found link to continue searching + current_pos = url_end_index + 1 + } + return result +} diff --git a/lib/web/doctreeclient/extract_links_test.v b/lib/web/doctreeclient/extract_links_test.v new file mode 100644 index 00000000..2610dadf --- /dev/null +++ b/lib/web/doctreeclient/extract_links_test.v @@ -0,0 +1,78 @@ +module doctreeclient + +import os + +fn test_extract_image_links(exclude_http bool) { + // Test case 1: Basic case with one image link + mut result := extract_image_links('Some text ![Alt Text](https://example.com/image1.png) more text')! + assert result.len == 1 + assert result[0] == 'https://example.com/image1.png' + + // Test case 2: Multiple image links + result = extract_image_links('![Img1](https://example.com/img1.jpg) Text ![Img2](https://example.com/img2.gif)')! + assert result.len == 2 + assert result[0] == 'https://example.com/img1.jpg' + assert result[1] == 'https://example.com/img2.gif' + + // Test case 3: No image links + result = extract_image_links('Just some plain text without images.')! + assert result.len == 0 + + // Test case 4: Mixed content with other markdown + result = extract_image_links('A link [Link](https://example.com) and an image ![Photo](https://example.com/photo.jpeg).')! + assert result.len == 1 + assert result[0] == 'https://example.com/photo.jpeg' + + // Test case 5: Invalid image link (missing parenthesis) + result = extract_image_links('Invalid ![Broken Link]https://example.com/broken.png')! + assert result.len == 0 + + // Test case 6: Empty string + result = extract_image_links('')! + assert result.len == 0 + + // Test case 7: Image link at the beginning of the string + result = extract_image_links('![Start](https://example.com/start.png) Some text.')! + assert result.len == 1 + assert result[0] == 'https://example.com/start.png' + + // Test case 8: Image link at the end of the string + result = extract_image_links('Some text ![End](https://example.com/end.png)')! + assert result.len == 1 + assert result[0] == 'https://example.com/end.png' + + // Test case 9: Image link with spaces in URL (should not happen in valid markdown, but good to test robustness) + result = extract_image_links('![Space](https://example.com/image with spaces.png)')! + assert result.len == 1 + assert result[0] == 'https://example.com/image with spaces.png' + + // Test case 10: Image link with special characters in URL + result = extract_image_links('![Special](https://example.com/path/to/image?id=1&name=test.png)')! + assert result.len == 1 + assert result[0] == 'https://example.com/path/to/image?id=1&name=test.png' + + // Test case 11: Multiple image links without spaces in between + result = extract_image_links('![A](https://a.com)![B](https://b.com)![C](https://c.com)')! + assert result.len == 3 + assert result[0] == 'https://a.com' + assert result[1] == 'https://b.com' + assert result[2] == 'https://c.com' + + // Test case 12: Image link with empty alt text + result = extract_image_links('![](https://example.com/noalt.png)')! + assert result.len == 1 + assert result[0] == 'https://example.com/noalt.png' + + // Test case 13: Image link with empty URL (invalid markdown, but test behavior) + result = extract_image_links('![Empty URL]()')! + assert result.len == 1 + assert result[0] == '' // Expecting an empty string for the URL + + // Test case 14: Image link with only alt text and no URL part + result = extract_image_links('![Only Alt Text]')! + assert result.len == 0 + + // Test case 15: Image link with only URL part and no alt text + result = extract_image_links('()')! + assert result.len == 0 +} \ No newline at end of file diff --git a/lib/web/sitegen/site.v b/lib/web/sitegen/site.v index 2f640bbf..798f6fa0 100644 --- a/lib/web/sitegen/site.v +++ b/lib/web/sitegen/site.v @@ -66,20 +66,22 @@ pub fn (mut site Site) page_add(args_ Page) ! { collection_name := parts[0] page_name := parts[1] - mut page_content := site.client.get_page_content(collection_name, page_name) or { - return error("Couldn't find page '${page_name}' in collection '${collection_name}' using doctreeclient. Available pages:\n${site.client.list_markdown()!}\nError: ${err}") - } + // mut page_content := site.client.get_page_content(collection_name, page_name) or { + // return error("Couldn't find page '${page_name}' in collection '${collection_name}' using doctreeclient. Available pages:\n${site.client.list_markdown()!}\nError: ${err}") + // } - c+="\n${page_content}\n" + // c+="\n${page_content}\n" mut pagepath:= "${site.path.path}/${args.path}" - mut pagefile:= pathlib.get_file(path:pagepath,create:true)! - - console.print_debug("Writing page '${pagepath}'") - - pagefile.write(c)! + // mut pagefile:= pathlib.get_file(path:pagepath,create:true)! + // pagefile.write(c)! + console.print_debug("Copy page '${pagepath}' in collection '${collection_name}") + + site.client.copy_page(collection_name, page_name, pagepath) or { + return error("Couldn't copy page '${page_name}' in collection '${collection_name}' using doctreeclient. Available pages:\n${site.client.list_markdown()!}\nError: ${err}") + } }