diff --git a/doctree_implementation_plan.md b/doctree_implementation_plan.md deleted file mode 100644 index 317d637..0000000 --- a/doctree_implementation_plan.md +++ /dev/null @@ -1,258 +0,0 @@ -# Implementation Plan: DocTree Collection Scanner - -## Overview - -We need to expand the doctree library to: -1. Add a recursive scan function to the DocTree struct -2. Detect directories containing `.collection` files -3. Parse `.collection` files as TOML to extract collection names -4. Replace the current `name_fix` function with the one from the sal library -5. Populate collections with all files found under the collection directories - -## Detailed Implementation Plan - -### 1. Update Dependencies - -First, we need to add the necessary dependencies to the Cargo.toml file: - -```toml -[dependencies] -walkdir = "2.3.3" -pulldown-cmark = "0.9.3" -thiserror = "1.0.40" -lazy_static = "1.4.0" -toml = "0.7.3" # Add TOML parsing support -``` - -### 2. Replace the name_fix Function - -Replace the current `name_fix` function in `utils.rs` with the one from the sal library: - -```rust -pub fn name_fix(text: &str) -> String { - let mut result = String::with_capacity(text.len()); - - let mut last_was_underscore = false; - for c in text.chars() { - // Keep only ASCII characters - if c.is_ascii() { - // Replace specific characters with underscore - if c.is_whitespace() || c == ',' || c == '-' || c == '"' || c == '\'' || - c == '#' || c == '!' || c == '(' || c == ')' || c == '[' || c == ']' || - c == '=' || c == '+' || c == '<' || c == '>' || c == '@' || c == '$' || - c == '%' || c == '^' || c == '&' || c == '*' { - // Only add underscore if the last character wasn't an underscore - if !last_was_underscore { - result.push('_'); - last_was_underscore = true; - } - } else { - // Add the character as is (will be converted to lowercase later) - result.push(c); - last_was_underscore = false; - } - } - // Non-ASCII characters are simply skipped - } - - // Convert to lowercase - return result.to_lowercase(); -} -``` - -### 3. Add Collection Configuration Struct - -Create a new struct to represent the configuration found in `.collection` files: - -```rust -#[derive(Deserialize, Default)] -struct CollectionConfig { - name: Option, - // Add other configuration options as needed -} -``` - -### 4. Add Scan Collections Method to DocTree - -Add a new method to the DocTree struct to recursively scan directories for `.collection` files: - -```rust -impl DocTree { - /// Recursively scan directories for .collection files and add them as collections - /// - /// # Arguments - /// - /// * `root_path` - The root path to start scanning from - /// - /// # Returns - /// - /// Ok(()) on success or an error - pub fn scan_collections>(&mut self, root_path: P) -> Result<()> { - let root_path = root_path.as_ref(); - - // Walk through the directory tree - for entry in WalkDir::new(root_path).follow_links(true) { - let entry = match entry { - Ok(entry) => entry, - Err(e) => { - eprintln!("Error walking directory: {}", e); - continue; - } - }; - - // Skip non-directories - if !entry.file_type().is_dir() { - continue; - } - - // Check if this directory contains a .collection file - let collection_file_path = entry.path().join(".collection"); - if collection_file_path.exists() { - // Found a collection directory - let dir_path = entry.path(); - - // Get the directory name as a fallback collection name - let dir_name = dir_path.file_name() - .and_then(|name| name.to_str()) - .unwrap_or("unnamed"); - - // Try to read and parse the .collection file - let collection_name = match fs::read_to_string(&collection_file_path) { - Ok(content) => { - // Parse as TOML - match toml::from_str::(&content) { - Ok(config) => { - // Use the name from config if available, otherwise use directory name - config.name.unwrap_or_else(|| dir_name.to_string()) - }, - Err(e) => { - eprintln!("Error parsing .collection file at {:?}: {}", collection_file_path, e); - dir_name.to_string() - } - } - }, - Err(e) => { - eprintln!("Error reading .collection file at {:?}: {}", collection_file_path, e); - dir_name.to_string() - } - }; - - // Add the collection to the DocTree - match self.add_collection(dir_path, &collection_name) { - Ok(_) => { - println!("Added collection '{}' from {:?}", collection_name, dir_path); - }, - Err(e) => { - eprintln!("Error adding collection '{}' from {:?}: {}", collection_name, dir_path, e); - } - } - } - } - - Ok(()) - } -} -``` - -### 5. Update the DocTreeBuilder - -Update the DocTreeBuilder to include a method for scanning collections: - -```rust -impl DocTreeBuilder { - /// Scan for collections in the given root path - /// - /// # Arguments - /// - /// * `root_path` - The root path to scan for collections - /// - /// # Returns - /// - /// Self for method chaining or an error - pub fn scan_collections>(self, root_path: P) -> Result { - // Ensure storage is set - let storage = self.storage.as_ref().ok_or_else(|| { - DocTreeError::MissingParameter("storage".to_string()) - })?; - - // Create a temporary DocTree to scan collections - let mut temp_doctree = DocTree { - collections: HashMap::new(), - default_collection: None, - storage: storage.clone(), - name: self.name.clone().unwrap_or_default(), - path: self.path.clone().unwrap_or_else(|| PathBuf::from("")), - }; - - // Scan for collections - temp_doctree.scan_collections(root_path)?; - - // Create a new builder with the scanned collections - let mut new_builder = self; - for (name, collection) in temp_doctree.collections { - new_builder.collections.insert(name, collection); - } - - Ok(new_builder) - } -} -``` - -### 6. Add a Convenience Function to the Library - -Add a convenience function to the library for creating a DocTree by scanning a directory: - -```rust -/// Create a new DocTree by scanning a directory for collections -/// -/// # Arguments -/// -/// * `root_path` - The root path to scan for collections -/// -/// # Returns -/// -/// A new DocTree or an error -pub fn from_directory>(root_path: P) -> Result { - let storage = RedisStorage::new("redis://localhost:6379")?; - - DocTree::builder() - .with_storage(storage) - .scan_collections(root_path)? - .build() -} -``` - -## Implementation Flow Diagram - -```mermaid -flowchart TD - A[Start] --> B[Update Dependencies] - B --> C[Replace name_fix function] - C --> D[Add CollectionConfig struct] - D --> E[Add scan_collections method to DocTree] - E --> F[Update DocTreeBuilder] - F --> G[Add convenience function] - G --> H[End] -``` - -## Component Interaction Diagram - -```mermaid -graph TD - A[DocTree] -->|manages| B[Collections] - C[scan_collections] -->|finds| D[.collection files] - D -->|parsed as| E[TOML] - E -->|extracts| F[Collection Name] - C -->|creates| B - G[name_fix] -->|processes| F - G -->|processes| H[File Names] - B -->|contains| H -``` - -## Testing Plan - -1. Create test directories with `.collection` files in various formats -2. Test the scan_collections method with these directories -3. Verify that collections are created correctly with the expected names -4. Verify that all files under the collection directories are included in the collections -5. Test edge cases such as empty `.collection` files, invalid TOML, etc. \ No newline at end of file diff --git a/doctree_ipfs_export_plan.md b/doctree_ipfs_export_plan.md deleted file mode 100644 index 02f5a98..0000000 --- a/doctree_ipfs_export_plan.md +++ /dev/null @@ -1,89 +0,0 @@ -# Implementation Plan: Exporting DocTree Collections to IPFS - -**Objective:** Add functionality to the `doctree` library to export files and images from collections to IPFS, encrypting them using Blake3 hash as the key and ChaCha20Poly1305, and generating a CSV manifest. - -**Dependencies:** - -We will need to add the following dependencies to the `[dependencies]` section of `doctree/Cargo.toml`: - -* `ipfs-api = "0.17.0"`: For interacting with the IPFS daemon. -* `chacha20poly1305 = "0.10.1"`: For symmetric encryption. -* `blake3 = "1.3.1"`: For calculating Blake3 hashes. -* `csv = "1.1"`: For writing the CSV manifest file. -* `walkdir = "2.3.2"`: Already a dependency, but will be used for iterating through collection files. -* `tokio = { version = "1", features = ["full"] }`: `ipfs-api` requires an async runtime. - -**Plan:** - -1. **Modify `doctree/Cargo.toml`:** Add the new dependencies. - - ```toml - [dependencies] - # Existing dependencies... - ipfs-api = "0.17.0" - chacha20poly1305 = "0.10.1" - blake3 = "1.3.1" - csv = "1.1" - walkdir = "2.3.2" - tokio = { version = "1", features = ["full"] } - ``` - -2. **Implement `export_to_ipfs` method in `doctree/src/collection.rs`:** - - * Add necessary imports: `std::path::PathBuf`, `std::fs`, `blake3`, `chacha20poly1305::ChaCha20Poly1305`, `chacha20poly1305::aead::Aead`, `chacha20poly1305::aead::NewAead`, `rust_ipfs::Ipfs`, `rust_ipfs::IpfsPath`, `tokio`, `csv`. - * Define an `async` method `export_to_ipfs` on the `Collection` struct. This method will take the output CSV file path as an argument. - * Inside the method, create a `csv::Writer` to write the manifest. - * Use `walkdir::WalkDir` to traverse the collection's directory (`self.path`). - * Filter out directories and the `.collection` file. - * For each file: - * Read the file content. - * Calculate the Blake3 hash of the content. - * Use the first 32 bytes of the Blake3 hash as the key for `ChaCha20Poly1305`. Generate a random nonce. - * Encrypt the file content using `ChaCha20Poly1305`. - * Connect to the local IPFS daemon using `ipfs-api`. - * Add the encrypted content to IPFS. - * Get the IPFS hash and the size of the original file. - * Write a record to the CSV file with: `self.name`, filename (relative to collection path), Blake3 hash (hex encoded), IPFS hash, and original file size. - * Handle potential errors during file reading, hashing, encryption, IPFS interaction, and CSV writing. - -3. **Implement `export_collections_to_ipfs` method in `doctree/src/doctree.rs`:** - - * Add necessary imports: `tokio`. - * Define an `async` method `export_collections_to_ipfs` on the `DocTree` struct. This method will take the output CSV directory path as an argument. - * Inside the method, iterate through the `self.collections` HashMap. - * For each collection, construct the output CSV file path (e.g., `output_dir/collection_name.csv`). - * Call the `export_to_ipfs` method on the collection, awaiting the result. - * Handle potential errors from the collection export. - -4. **Export the new methods:** Make the new methods public in `doctree/src/lib.rs`. - - ```rust - // Existing exports... - pub use doctree::{DocTree, DocTreeBuilder, new, from_directory}; - ``` - - should become: - - ```rust - // Existing exports... - pub use doctree::{DocTree, DocTreeBuilder, new, from_directory, export_collections_to_ipfs}; - pub use collection::export_to_ipfs; // Assuming you want to expose the collection method as well - ``` - -**Mermaid Diagram:** - -```mermaid -graph TD - A[DocTree] --> B{Iterate Collections}; - B --> C[Collection]; - C --> D{Iterate Files/Images}; - D --> E[Read File Content]; - E --> F[Calculate Blake3 Hash]; - F --> G[Encrypt Content (ChaCha20Poly1305)]; - G --> H[Add Encrypted Content to IPFS]; - H --> I[Get IPFS Hash and Size]; - I --> J[Write Record to CSV]; - J --> D; - D --> C; - C --> B; - B --> K[CSV Manifest Files]; \ No newline at end of file diff --git a/examples/doctreenew/sites/demo1/collection.hjson b/examples/doctreenew/sites/demo1/collection.hjson index e5c842c..af8d414 100644 --- a/examples/doctreenew/sites/demo1/collection.hjson +++ b/examples/doctreenew/sites/demo1/collection.hjson @@ -8,13 +8,17 @@ { name: biz - url: https://git.ourworld.tf/tfgrid/docs_tfgrid4/src/branch/main/aibox/collectios/aaa + url: https://git.ourworld.tf/tfgrid/docs_tfgrid4/src/branch/main/aibox/collections/aaa description: Business documentation. } { name: products - url: https://git.ourworld.tf/tfgrid/docs_tfgrid4/src/branch/main/aibox/collectios/vvv + url: https://git.ourworld.tf/tfgrid/docs_tfgrid4/src/branch/main/aibox/collections/vvv description: Information about ThreeFold products. } + { + scan: true + url: https://git.ourworld.tf/tfgrid/docs_tfgrid4/src/branch/main/aibox/collections + } ] diff --git a/examples/doctreenew/sites/demo1/pages.hjson b/examples/doctreenew/sites/demo1/pages/mypages1.hjson similarity index 72% rename from examples/doctreenew/sites/demo1/pages.hjson rename to examples/doctreenew/sites/demo1/pages/mypages1.hjson index 7502009..df0c3ae 100644 --- a/examples/doctreenew/sites/demo1/pages.hjson +++ b/examples/doctreenew/sites/demo1/pages/mypages1.hjson @@ -3,30 +3,30 @@ name: home title: Home Page description: This is the main landing page. - navPath: / + navpath: / collection: acollection } { name: about title: About Us - navPath: /about + navpath: /about collection: acollection } { name: docs title: Documentation - navPath: /sub/docs + navpath: /sub/docs collection: docs_hero } { - name: hidden-page - title: Hidden Page + name: draft-page + title: draft Page description: This page is not shown in navigation. - hidden: true - navPath: /cantsee + draft: true + navpath: /cantsee collection: acollection } ] diff --git a/webbuilder/Cargo.toml b/webbuilder/Cargo.toml new file mode 100644 index 0000000..c70d03c --- /dev/null +++ b/webbuilder/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "doctree" +version = "0.1.0" +edition = "2024" +[lib] +path = "src/lib.rs" + + +[dependencies] +walkdir = "2.3.3" +pulldown-cmark = "0.9.3" +thiserror = "1.0.40" +lazy_static = "1.4.0" +toml = "0.7.3" +serde = { version = "1.0", features = ["derive"] } +redis = { version = "0.23.0", features = ["tokio-comp"] } +tokio = { version = "1.28.0", features = ["full"] } +sal = { git = "https://git.ourworld.tf/herocode/sal.git", branch = "main" } +chacha20poly1305 = "0.10.1" +blake3 = "1.3.1" +csv = "1.1" +rand = "0.9.1" +ipfs-api-backend-hyper = "0.6" +ipfs-api = { version = "0.17.0", default-features = false, features = ["with-hyper-tls"] } diff --git a/webbuilder/src/builder/specs.md b/webbuilder/src/builder/specs.md new file mode 100644 index 0000000..89735f0 --- /dev/null +++ b/webbuilder/src/builder/specs.md @@ -0,0 +1,87 @@ +# Web Builder Specification + +This document describes the process of building web metadata and exporting assets for a website, resulting in a `webmeta.json` file that can be used by a browser-based website generator. + +## Overview + +The web building process starts with a directory containing the site's Hjson configuration files, such as the example directory `/Users/despiegk/code/git.ourworld.tf/herocode/doctree/examples/doctreenew/sites/demo1`. These Hjson files define the structure and content of the entire site and may reference external collections. The Hjson configuration sits "on top" of the collections it utilizes. Using the metadata defined in these Hjson files, the necessary collection data is downloaded from Git repositories (if referenced). The `doctree` is then used to process the relevant data, identify pages and images, and prepare them for export to IPFS. Finally, a `webmeta.json` file is generated containing all the necessary information, including IPFS keys and Blake hashes for content verification, allowing a browser-based tool to render the website by fetching assets from IPFS. Optionally, the generated `webmeta.json` file can also be uploaded to IPFS, and its IPFS URL returned. + +## Process Steps + +1. **Start from Hjson Directory:** + * The process begins with a designated directory containing the site's Hjson configuration files. This directory serves as the single input for the web building process. + +2. **Parse Site Metadata (Hjson):** + * Locate and parse all `.hjson` files within the input directory and its subdirectories (e.g., `pages`). These files collectively define the site's structure, content, and configuration, and may include references to external collections. + +3. **Download Referenced Collections from Git:** + * If the Hjson metadata references external collections hosted in Git repositories, download these collections using a separate tool or crate responsible for Git interactions. The Hjson files provide the necessary information (e.g., repository URLs, branch names) to perform these downloads. + +4. **Process Site Content and Collections with Doctree:** + * Utilize the `doctree` library to process the parsed site metadata and the content of any downloaded collections. + * `doctree` will build the document tree based on the Hjson structure and identify relevant assets such as pages (e.g., Markdown files) and images referenced within the site configuration or collections. + +5. **Export Assets to IPFS:** + * Export the identified assets (pages, images, etc.) to IPFS. + * For each exported asset, obtain its IPFS key (CID) and calculate its Blake hash for content integrity verification. + +6. **Generate `webmeta.json`:** + * Create a single `webmeta.json` file that consolidates all the necessary information for the browser-based generator. + * This file should include: + * Site-level metadata (from Hjson). + * Structure of the website (pages, navigation, etc.). + * For each page, include: + * Page metadata (from Hjson). + * The IPFS key of the page content. + * The Blake hash of the page content. + * Information about other assets (images, etc.), including their IPFS keys. + +7. **Optional: Upload `webmeta.json` to IPFS:** + * Optionally, upload the generated `webmeta.json` file to IPFS. + * If uploaded, the IPFS URL of the `webmeta.json` file is returned as the output of the web building process. + +8. **Utilize `webmeta.json` in Browser:** + * The generated `webmeta.json` file (either locally or fetched from IPFS) serves as the single configuration entry point for a browser-based website generator. + * The browser tool reads `webmeta.json`, uses the IPFS keys to fetch the content and assets from the IPFS network, and renders the website dynamically. The Blake hashes can be used to verify the integrity of the downloaded content. + +## `webmeta.json` Structure (Example) + +```json +{ + "site_metadata": { + // Consolidated data from site-level Hjson files (collection, header, footer, main, etc.) + "name": "demo1", + "title": "Demo Site 1", + "description": "This is a demo site for doctree", + "keywords": ["demo", "doctree", "example"], + "header": { ... }, + "footer": { ... } + }, + "pages": [ + { + "id": "mypages1", + "title": "My Pages 1", + "ipfs_key": "Qm...", // IPFS key of the page content + "blakehash": "sha256-...", // Blake hash of the page content + "sections": [ + { "type": "text", "content": "..." } // Potentially include some inline content or structure + ], + "assets": [ + { + "name": "image1.png", + "ipfs_key": "Qm..." // IPFS key of an image used on the page + } + ] + } + // Other pages... + ], + "assets": { + // Global assets not tied to a specific page, e.g., CSS, global images + "style.css": { + "ipfs_key": "Qm..." + } + } +} +``` + +This structure is a suggestion and can be adapted based on the specific needs of the browser-based generator. The key is to include all necessary information (metadata, IPFS keys, hashes) to allow the browser to fetch and render the complete website. \ No newline at end of file diff --git a/webbuilder/src/builder/webmeta.json b/webbuilder/src/builder/webmeta.json new file mode 100644 index 0000000..4373559 --- /dev/null +++ b/webbuilder/src/builder/webmeta.json @@ -0,0 +1,43 @@ +{ + "site_metadata": { + "name": "demo1", + "title": "Demo Site 1", + "description": "This is a demo site for doctree", + "keywords": ["demo", "doctree", "example"], + "header": { + "logo": "/images/logo.png", + "nav": [ + { "text": "Home", "url": "/" }, + { "text": "About", "url": "/about" } + ] + }, + "footer": { + "copyright": "© 2023 My Company", + "links": [ + { "text": "Privacy Policy", "url": "/privacy" } + ] + } + }, + "pages": [ + { + "id": "mypages1", + "title": "My Pages 1", + "ipfs_key": "QmPlaceholderIpfsKey1", + "blakehash": "sha256-PlaceholderBlakeHash1", + "sections": [ + { "type": "text", "content": "This is example content for My Pages 1." } + ], + "assets": [ + { + "name": "image1.png", + "ipfs_key": "QmPlaceholderImageIpfsKey1" + } + ] + } + ], + "assets": { + "style.css": { + "ipfs_key": "QmPlaceholderCssIpfsKey1" + } + } +} \ No newline at end of file