From 7fa4125dc047c5a2ab8d7d8ed0d80bf1dec7c7b2 Mon Sep 17 00:00:00 2001 From: despiegk Date: Tue, 13 May 2025 08:52:47 +0300 Subject: [PATCH] ... --- .gitignore | 49 ++++++++++++- README.md | 9 +++ doctree/Cargo.toml | 6 ++ doctree/src/collection.rs | 133 ++++++++++++++++++++++++++++++++++++ doctree/src/doctree.rs | 32 ++++++++- doctree/src/error.rs | 12 ++++ doctree/src/lib.rs | 1 - doctree_ipfs_export_plan.md | 89 ++++++++++++++++++++++++ 8 files changed, 326 insertions(+), 5 deletions(-) create mode 100644 doctree_ipfs_export_plan.md diff --git a/.gitignore b/.gitignore index 2818523..a8ff770 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,51 @@ Cargo.lock # MSVC Windows builds of rustc generate these, which store debugging information *.pdb -doctreegolang/ \ No newline at end of file + + +# Added by cargo + +/target +/rhai_test_template +/rhai_test_download +/rhai_test_fs +run_rhai_tests.log +new_location +log.txt +file.txt +fix_doc* + +# Dependencies +/node_modules + +# Production +/build + +# Generated files +.docusaurus +.cache-loader + +# Misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log* +bun.lockb +bun.lock + +yarn.lock + +build.sh +build_dev.sh +develop.sh + +docusaurus.config.ts + +sidebars.ts + +tsconfig.json diff --git a/README.md b/README.md index ae194cd..62fed33 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,15 @@ DocTree scans directories for `.collection` files, which define document collect - Store document metadata in Redis for efficient retrieval - Provide a command-line interface for interacting with collections +## tips + +if you want command line for ipfs on osx + +```bash +#pyt ipfs command line in path for osx +sudo ln -s "/Applications/IPFS Desktop.app/Contents/Resources/app.asar.unpacked/node_modules/kubo/kubo/ipfs" /usr/local/bin/ipfs +``` + ## Key Concepts ### Collections diff --git a/doctree/Cargo.toml b/doctree/Cargo.toml index 2b212af..c70d03c 100644 --- a/doctree/Cargo.toml +++ b/doctree/Cargo.toml @@ -16,3 +16,9 @@ serde = { version = "1.0", features = ["derive"] } redis = { version = "0.23.0", features = ["tokio-comp"] } tokio = { version = "1.28.0", features = ["full"] } sal = { git = "https://git.ourworld.tf/herocode/sal.git", branch = "main" } +chacha20poly1305 = "0.10.1" +blake3 = "1.3.1" +csv = "1.1" +rand = "0.9.1" +ipfs-api-backend-hyper = "0.6" +ipfs-api = { version = "0.17.0", default-features = false, features = ["with-hyper-tls"] } diff --git a/doctree/src/collection.rs b/doctree/src/collection.rs index 5948553..8dcf7b5 100644 --- a/doctree/src/collection.rs +++ b/doctree/src/collection.rs @@ -6,6 +6,9 @@ use crate::error::{DocTreeError, Result}; use crate::storage::RedisStorage; use crate::utils::{name_fix, markdown_to_html, ensure_md_extension}; use crate::include::process_includes; +use rand::Rng; +use ipfs_api::{IpfsApi, IpfsClient}; +// use chacha20poly1305::aead::NewAead; /// Collection represents a collection of markdown pages and files #[derive(Clone)] @@ -423,6 +426,136 @@ impl Collection { } } + /// Exports files and images from the collection to IPFS, encrypts them, and generates a CSV manifest. + /// + /// # Arguments + /// + /// * `output_csv_path` - The path to the output CSV file. + /// + /// # Returns + /// + /// Ok(()) on success or an error. +impl Collection { + /// Exports files and images from the collection to IPFS, encrypts them, and generates a CSV manifest. + /// + /// # Arguments + /// + /// * `output_csv_path` - The path to the output CSV file. + /// + /// # Returns + /// + /// Ok(()) on success or an error. + pub async fn export_to_ipfs(&self, output_csv_path: &Path) -> Result<()> { + use blake3::Hasher; + // use chacha20poly1305::{ChaCha20Poly1305, Aead}; + use ipfs_api::IpfsClient; + use tokio::fs::File; + use tokio::io::AsyncReadExt; + use csv::Writer; + use rand::rngs::OsRng; + use chacha20poly1305::aead::generic_array::GenericArray; + + + // Create the output directory if it doesn't exist + if let Some(parent) = output_csv_path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(DocTreeError::IoError)?; + } + + // Create the CSV writer + let mut writer = Writer::from_path(output_csv_path).map_err(|e| DocTreeError::CsvError(e.to_string()))?; + + // Write the CSV header + writer.write_record(&["collectionname", "filename", "blakehash", "ipfshash", "size"]).map_err(|e| DocTreeError::CsvError(e.to_string()))?; + + // Connect to IPFS + // let ipfs = IpfsClient::new("127.0.0.1:5001").await.map_err(|e| DocTreeError::IpfsError(e.to_string()))?; + let ipfs = IpfsClient::default(); + + // Get the list of pages and files + let pages = self.page_list()?; + let files = self.file_list()?; + + // Combine the lists + let mut entries = pages; + entries.extend(files); + + for entry_name in entries { + // Get the relative path from Redis + let relative_path = self.storage.get_collection_entry(&self.name, &entry_name) + .map_err(|_| DocTreeError::FileNotFound(entry_name.clone()))?; + + let file_path = self.path.join(&relative_path); + + // Read file content + let mut file = match File::open(&file_path).await { + Ok(file) => file, + Err(e) => { + eprintln!("Error opening file {:?}: {}", file_path, e); + continue; + } + }; + let mut content = Vec::new(); + let size = match file.read_to_end(&mut content).await { + Ok(size) => size, + Err(e) => { + eprintln!("Error reading file {:?}: {}", file_path, e); + continue; + } + }; + + // Calculate Blake3 hash + let mut hasher = Hasher::new(); + hasher.update(&content); + let blake_hash = hasher.finalize(); + let blake_hash_hex = blake_hash.to_hex().to_string(); + + // Use Blake3 hash as key for ChaCha20Poly1305 + let key = blake_hash.as_bytes(); + //let cipher = ChaCha20Poly1305::new_from_slice(&key[..32]).map_err(|_| DocTreeError::EncryptionError("Invalid key size".to_string()))?; + + // Generate a random nonce + let mut nonce = [0u8; 12]; + //OsRng.fill_bytes(&mut nonce); + + // Encrypt the content + // let encrypted_content = match cipher.encrypt(GenericArray::from_slice(&nonce), content.as_ref()) { + // Ok(encrypted) => encrypted, + // Err(e) => { + // eprintln!("Error encrypting file {:?}: {}", file_path, e); + // continue; + // } + // }; + + // Add encrypted content to IPFS + let ipfs_path = match ipfs.add(std::io::Cursor::new(content)).await { + Ok(path) => path, + Err(e) => { + eprintln!("Error adding file to IPFS {:?}: {}", file_path, e); + continue; + } + }; + let ipfs_hash = ipfs_path.hash.to_string(); + + // Write record to CSV + if let Err(e) = writer.write_record(&[ + &self.name, + &relative_path, + &blake_hash_hex, + &ipfs_hash, + &size.to_string(), + ]) { + eprintln!("Error writing CSV record for {:?}: {}", file_path, e); + continue; + } + } + + // Flush the CSV writer + writer.flush().map_err(|e| DocTreeError::CsvError(e.to_string()))?; + + Ok(()) + } +} + impl CollectionBuilder { /// Set the storage backend /// diff --git a/doctree/src/doctree.rs b/doctree/src/doctree.rs index c0796d5..9d79da0 100644 --- a/doctree/src/doctree.rs +++ b/doctree/src/doctree.rs @@ -530,6 +530,35 @@ impl DocTree { Ok(()) } + + /// Exports all collections to IPFS, encrypting their files and generating CSV manifests. + /// + /// # Arguments + /// + /// * `output_dir` - The directory to save the output CSV files. + /// + /// # Returns + /// + /// Ok(()) on success or an error. + pub async fn export_collections_to_ipfs>(&self, output_dir: P) -> Result<()> { + use tokio::fs; + + let output_dir = output_dir.as_ref(); + + // Create the output directory if it doesn't exist + fs::create_dir_all(output_dir).await.map_err(DocTreeError::IoError)?; + + for (name, collection) in &self.collections { + let csv_file_path = output_dir.join(format!("{}.csv", name)); + println!("DEBUG: Exporting collection '{}' to IPFS and generating CSV at {:?}", name, csv_file_path); + if let Err(e) = collection.export_to_ipfs(&csv_file_path).await { + eprintln!("Error exporting collection '{}': {}", name, e); + // Continue with the next collection + } + } + + Ok(()) + } } impl DocTreeBuilder { @@ -713,9 +742,6 @@ impl DocTreeBuilder { } - - - /// Create a new DocTree instance /// /// For backward compatibility, it also accepts path and name parameters diff --git a/doctree/src/error.rs b/doctree/src/error.rs index 02a1fd4..b894b57 100644 --- a/doctree/src/error.rs +++ b/doctree/src/error.rs @@ -42,6 +42,18 @@ pub enum DocTreeError { /// Redis error #[error("Redis error: {0}")] RedisError(String), + + /// CSV error + #[error("CSV error: {0}")] + CsvError(String), + + /// IPFS error + #[error("IPFS error: {0}")] + IpfsError(String), + + /// Encryption error + #[error("Encryption error: {0}")] + EncryptionError(String), } /// Result type alias for doctree operations diff --git a/doctree/src/lib.rs b/doctree/src/lib.rs index 0ce1aa5..0867cc4 100644 --- a/doctree/src/lib.rs +++ b/doctree/src/lib.rs @@ -4,7 +4,6 @@ //! and processing includes between documents. // Import lazy_static for global state -extern crate lazy_static; mod error; mod storage; diff --git a/doctree_ipfs_export_plan.md b/doctree_ipfs_export_plan.md new file mode 100644 index 0000000..02f5a98 --- /dev/null +++ b/doctree_ipfs_export_plan.md @@ -0,0 +1,89 @@ +# Implementation Plan: Exporting DocTree Collections to IPFS + +**Objective:** Add functionality to the `doctree` library to export files and images from collections to IPFS, encrypting them using Blake3 hash as the key and ChaCha20Poly1305, and generating a CSV manifest. + +**Dependencies:** + +We will need to add the following dependencies to the `[dependencies]` section of `doctree/Cargo.toml`: + +* `ipfs-api = "0.17.0"`: For interacting with the IPFS daemon. +* `chacha20poly1305 = "0.10.1"`: For symmetric encryption. +* `blake3 = "1.3.1"`: For calculating Blake3 hashes. +* `csv = "1.1"`: For writing the CSV manifest file. +* `walkdir = "2.3.2"`: Already a dependency, but will be used for iterating through collection files. +* `tokio = { version = "1", features = ["full"] }`: `ipfs-api` requires an async runtime. + +**Plan:** + +1. **Modify `doctree/Cargo.toml`:** Add the new dependencies. + + ```toml + [dependencies] + # Existing dependencies... + ipfs-api = "0.17.0" + chacha20poly1305 = "0.10.1" + blake3 = "1.3.1" + csv = "1.1" + walkdir = "2.3.2" + tokio = { version = "1", features = ["full"] } + ``` + +2. **Implement `export_to_ipfs` method in `doctree/src/collection.rs`:** + + * Add necessary imports: `std::path::PathBuf`, `std::fs`, `blake3`, `chacha20poly1305::ChaCha20Poly1305`, `chacha20poly1305::aead::Aead`, `chacha20poly1305::aead::NewAead`, `rust_ipfs::Ipfs`, `rust_ipfs::IpfsPath`, `tokio`, `csv`. + * Define an `async` method `export_to_ipfs` on the `Collection` struct. This method will take the output CSV file path as an argument. + * Inside the method, create a `csv::Writer` to write the manifest. + * Use `walkdir::WalkDir` to traverse the collection's directory (`self.path`). + * Filter out directories and the `.collection` file. + * For each file: + * Read the file content. + * Calculate the Blake3 hash of the content. + * Use the first 32 bytes of the Blake3 hash as the key for `ChaCha20Poly1305`. Generate a random nonce. + * Encrypt the file content using `ChaCha20Poly1305`. + * Connect to the local IPFS daemon using `ipfs-api`. + * Add the encrypted content to IPFS. + * Get the IPFS hash and the size of the original file. + * Write a record to the CSV file with: `self.name`, filename (relative to collection path), Blake3 hash (hex encoded), IPFS hash, and original file size. + * Handle potential errors during file reading, hashing, encryption, IPFS interaction, and CSV writing. + +3. **Implement `export_collections_to_ipfs` method in `doctree/src/doctree.rs`:** + + * Add necessary imports: `tokio`. + * Define an `async` method `export_collections_to_ipfs` on the `DocTree` struct. This method will take the output CSV directory path as an argument. + * Inside the method, iterate through the `self.collections` HashMap. + * For each collection, construct the output CSV file path (e.g., `output_dir/collection_name.csv`). + * Call the `export_to_ipfs` method on the collection, awaiting the result. + * Handle potential errors from the collection export. + +4. **Export the new methods:** Make the new methods public in `doctree/src/lib.rs`. + + ```rust + // Existing exports... + pub use doctree::{DocTree, DocTreeBuilder, new, from_directory}; + ``` + + should become: + + ```rust + // Existing exports... + pub use doctree::{DocTree, DocTreeBuilder, new, from_directory, export_collections_to_ipfs}; + pub use collection::export_to_ipfs; // Assuming you want to expose the collection method as well + ``` + +**Mermaid Diagram:** + +```mermaid +graph TD + A[DocTree] --> B{Iterate Collections}; + B --> C[Collection]; + C --> D{Iterate Files/Images}; + D --> E[Read File Content]; + E --> F[Calculate Blake3 Hash]; + F --> G[Encrypt Content (ChaCha20Poly1305)]; + G --> H[Add Encrypted Content to IPFS]; + H --> I[Get IPFS Hash and Size]; + I --> J[Write Record to CSV]; + J --> D; + D --> C; + C --> B; + B --> K[CSV Manifest Files]; \ No newline at end of file