This commit is contained in:
2025-07-19 19:02:18 +02:00
parent b9eb75d13e
commit e798187b89
5 changed files with 235 additions and 9 deletions

35
examples/data/regex_example.vsh Executable file
View File

@@ -0,0 +1,35 @@
#!/usr/bin/env -S v -n -w -gc none -cg -cc tcc -d use_openssl -enable-globals run
fn extract_image_markdown(s string) !(string, string) {
start := s.index('![') or { return error('Missing ![') }
alt_start := start + 2
alt_end := s.index_after(']', alt_start) or { return error('Missing ]') }
if s.len <= alt_end + 1 || s[alt_end + 1] != `(` {
return error('Missing opening ( after ]')
}
url_start := alt_end + 2
url_end := s.index_after(')', url_start) or { return error('Missing closing )') }
alt := s[alt_start..alt_end]
url := s[url_start..url_end]
return alt, url
}
fn main() {
text := 'Here is an image: ![Alt](http://example.com/image.png) and another ![Logo](https://site.org/logo.svg)'
mut i := 0
for {
if i >= text.len { break }
if text[i..].contains('![') {
snippet := text[i..]
alt, url := extract_image_markdown(snippet) or {
break
}
println('Alt: "$alt" | URL: "$url"')
i += snippet.index_after(')', 0) or { break } + 1
} else {
break
}
}
}

View File

@@ -2,7 +2,10 @@ module doctreeclient
import freeflowuniverse.herolib.core.pathlib
import freeflowuniverse.herolib.core.texttools
import freeflowuniverse.herolib.ui.console
import os
import regex
// List of recognized image file extensions
const image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.bmp', '.tiff', '.ico']
@@ -285,3 +288,67 @@ pub fn (mut c DocTreeClient) list_markdown() !string {
}
return markdown_output
}
// get_page_paths returns the path of a page and the paths of its linked images.
// Returns (page_path, image_paths)
pub fn (mut c DocTreeClient) get_page_paths(collection_name string, page_name string) !(string, []string) {
// Get the page path
page_path := c.get_page_path(collection_name, page_name)!
page_content := c.get_page_content(collection_name, page_name)!
// Extract image names from the page content
image_names := extract_image_links(page_content)!
mut image_paths := []string{}
for image_name in image_names {
// Get the path for each image
image_path := c.get_image_path(collection_name, image_name) or {
// If an image is not found, log a warning and continue, don't fail the whole operation
return error('Error: Linked image "${image_name}" not found in collection "${collection_name}". Skipping.')
}
image_paths << image_path
}
return page_path, image_paths
}
// copy_page copies a page and its linked images to a specified destination.
pub fn (mut c DocTreeClient) copy_page(collection_name string, page_name string, destination_path string) ! {
console.print_debug('copy_page: Copying page "${page_name}" from collection "${collection_name}" to "${destination_path}"')
// Get the page path and linked image paths
page_path, image_paths := c.get_page_paths(collection_name, page_name)!
console.print_debug('copy_page: Page path: "${page_path}"')
console.print_debug('copy_page: Linked image paths: ${image_paths}')
if true{panic("sdsdsd7")}
// Ensure the destination directory exists
console.print_debug('copy_page: Ensuring destination directory "${destination_path}" exists.')
os.mkdir_all(destination_path)!
console.print_debug('copy_page: Destination directory created/exists.')
// Copy the page file
page_file_name := os.base(page_path)
dest_page_path := os.join_path(destination_path, page_file_name)
console.print_debug('copy_page: Copying page file from "${page_path}" to "${dest_page_path}"')
os.cp(page_path, dest_page_path)!
console.print_debug('copy_page: Page file copied.')
// Create an 'images' subdirectory within the destination
images_dest_path := os.join_path(destination_path, 'images')
console.print_debug('copy_page: Ensuring images directory "${images_dest_path}" exists.')
os.mkdir_all(images_dest_path)!
console.print_debug('copy_page: Images directory created/exists.')
// Copy each linked image
for image_path in image_paths {
image_file_name := os.base(image_path)
dest_image_path := os.join_path(images_dest_path, image_file_name)
console.print_debug('copy_page: Copying image file from "${image_path}" to "${dest_image_path}"')
os.cp(image_path, dest_image_path)!
console.print_debug('copy_page: Image file "${image_file_name}" copied.')
}
console.print_debug('copy_page: All files copied successfully.')
}

View File

@@ -0,0 +1,44 @@
module doctreeclient
pub fn extract_image_links(s string) ![]string {
mut result := []string{}
mut current_pos := 0
for {
if current_pos >= s.len {
break
}
// Find the start of an image markdown link
start_index := s.index_after('![', current_pos) or { -1 }
if start_index == -1 {
break // No more image links found
}
// Find the closing bracket for alt text
alt_end_index := s.index_after(']', start_index) or { -1 }
if alt_end_index == -1 {
break
}
// Check for opening parenthesis for URL
if alt_end_index + 1 >= s.len || s[alt_end_index + 1] != `(` {
current_pos = alt_end_index + 1 // Move past this invalid sequence
continue
}
// Find the closing parenthesis for URL
url_start_index := alt_end_index + 2
url_end_index := s.index_after(')', url_start_index) or { -1 }
if url_end_index == -1 {
break
}
// Extract the URL
url := s[url_start_index..url_end_index]
result << url
// Move current_pos past the found link to continue searching
current_pos = url_end_index + 1
}
return result
}

View File

@@ -0,0 +1,78 @@
module doctreeclient
import os
fn test_extract_image_links(exclude_http bool) {
// Test case 1: Basic case with one image link
mut result := extract_image_links('Some text ![Alt Text](https://example.com/image1.png) more text')!
assert result.len == 1
assert result[0] == 'https://example.com/image1.png'
// Test case 2: Multiple image links
result = extract_image_links('![Img1](https://example.com/img1.jpg) Text ![Img2](https://example.com/img2.gif)')!
assert result.len == 2
assert result[0] == 'https://example.com/img1.jpg'
assert result[1] == 'https://example.com/img2.gif'
// Test case 3: No image links
result = extract_image_links('Just some plain text without images.')!
assert result.len == 0
// Test case 4: Mixed content with other markdown
result = extract_image_links('A link [Link](https://example.com) and an image ![Photo](https://example.com/photo.jpeg).')!
assert result.len == 1
assert result[0] == 'https://example.com/photo.jpeg'
// Test case 5: Invalid image link (missing parenthesis)
result = extract_image_links('Invalid ![Broken Link]https://example.com/broken.png')!
assert result.len == 0
// Test case 6: Empty string
result = extract_image_links('')!
assert result.len == 0
// Test case 7: Image link at the beginning of the string
result = extract_image_links('![Start](https://example.com/start.png) Some text.')!
assert result.len == 1
assert result[0] == 'https://example.com/start.png'
// Test case 8: Image link at the end of the string
result = extract_image_links('Some text ![End](https://example.com/end.png)')!
assert result.len == 1
assert result[0] == 'https://example.com/end.png'
// Test case 9: Image link with spaces in URL (should not happen in valid markdown, but good to test robustness)
result = extract_image_links('![Space](https://example.com/image with spaces.png)')!
assert result.len == 1
assert result[0] == 'https://example.com/image with spaces.png'
// Test case 10: Image link with special characters in URL
result = extract_image_links('![Special](https://example.com/path/to/image?id=1&name=test.png)')!
assert result.len == 1
assert result[0] == 'https://example.com/path/to/image?id=1&name=test.png'
// Test case 11: Multiple image links without spaces in between
result = extract_image_links('![A](https://a.com)![B](https://b.com)![C](https://c.com)')!
assert result.len == 3
assert result[0] == 'https://a.com'
assert result[1] == 'https://b.com'
assert result[2] == 'https://c.com'
// Test case 12: Image link with empty alt text
result = extract_image_links('![](https://example.com/noalt.png)')!
assert result.len == 1
assert result[0] == 'https://example.com/noalt.png'
// Test case 13: Image link with empty URL (invalid markdown, but test behavior)
result = extract_image_links('![Empty URL]()')!
assert result.len == 1
assert result[0] == '' // Expecting an empty string for the URL
// Test case 14: Image link with only alt text and no URL part
result = extract_image_links('![Only Alt Text]')!
assert result.len == 0
// Test case 15: Image link with only URL part and no alt text
result = extract_image_links('()')!
assert result.len == 0
}

View File

@@ -66,20 +66,22 @@ pub fn (mut site Site) page_add(args_ Page) ! {
collection_name := parts[0]
page_name := parts[1]
mut page_content := site.client.get_page_content(collection_name, page_name) or {
return error("Couldn't find page '${page_name}' in collection '${collection_name}' using doctreeclient. Available pages:\n${site.client.list_markdown()!}\nError: ${err}")
}
// mut page_content := site.client.get_page_content(collection_name, page_name) or {
// return error("Couldn't find page '${page_name}' in collection '${collection_name}' using doctreeclient. Available pages:\n${site.client.list_markdown()!}\nError: ${err}")
// }
c+="\n${page_content}\n"
// c+="\n${page_content}\n"
mut pagepath:= "${site.path.path}/${args.path}"
mut pagefile:= pathlib.get_file(path:pagepath,create:true)!
console.print_debug("Writing page '${pagepath}'")
pagefile.write(c)!
// mut pagefile:= pathlib.get_file(path:pagepath,create:true)!
// pagefile.write(c)!
console.print_debug("Copy page '${pagepath}' in collection '${collection_name}")
site.client.copy_page(collection_name, page_name, pagepath) or {
return error("Couldn't copy page '${page_name}' in collection '${collection_name}' using doctreeclient. Available pages:\n${site.client.list_markdown()!}\nError: ${err}")
}
}