This commit is contained in:
despiegk 2025-05-13 08:52:47 +03:00
parent 2fae059512
commit 7fa4125dc0
8 changed files with 326 additions and 5 deletions

49
.gitignore vendored
View File

@ -14,4 +14,51 @@ Cargo.lock
# MSVC Windows builds of rustc generate these, which store debugging information # MSVC Windows builds of rustc generate these, which store debugging information
*.pdb *.pdb
doctreegolang/
# Added by cargo
/target
/rhai_test_template
/rhai_test_download
/rhai_test_fs
run_rhai_tests.log
new_location
log.txt
file.txt
fix_doc*
# Dependencies
/node_modules
# Production
/build
# Generated files
.docusaurus
.cache-loader
# Misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*
bun.lockb
bun.lock
yarn.lock
build.sh
build_dev.sh
develop.sh
docusaurus.config.ts
sidebars.ts
tsconfig.json

View File

@ -12,6 +12,15 @@ DocTree scans directories for `.collection` files, which define document collect
- Store document metadata in Redis for efficient retrieval - Store document metadata in Redis for efficient retrieval
- Provide a command-line interface for interacting with collections - Provide a command-line interface for interacting with collections
## tips
if you want command line for ipfs on osx
```bash
#pyt ipfs command line in path for osx
sudo ln -s "/Applications/IPFS Desktop.app/Contents/Resources/app.asar.unpacked/node_modules/kubo/kubo/ipfs" /usr/local/bin/ipfs
```
## Key Concepts ## Key Concepts
### Collections ### Collections

View File

@ -16,3 +16,9 @@ serde = { version = "1.0", features = ["derive"] }
redis = { version = "0.23.0", features = ["tokio-comp"] } redis = { version = "0.23.0", features = ["tokio-comp"] }
tokio = { version = "1.28.0", features = ["full"] } tokio = { version = "1.28.0", features = ["full"] }
sal = { git = "https://git.ourworld.tf/herocode/sal.git", branch = "main" } sal = { git = "https://git.ourworld.tf/herocode/sal.git", branch = "main" }
chacha20poly1305 = "0.10.1"
blake3 = "1.3.1"
csv = "1.1"
rand = "0.9.1"
ipfs-api-backend-hyper = "0.6"
ipfs-api = { version = "0.17.0", default-features = false, features = ["with-hyper-tls"] }

View File

@ -6,6 +6,9 @@ use crate::error::{DocTreeError, Result};
use crate::storage::RedisStorage; use crate::storage::RedisStorage;
use crate::utils::{name_fix, markdown_to_html, ensure_md_extension}; use crate::utils::{name_fix, markdown_to_html, ensure_md_extension};
use crate::include::process_includes; use crate::include::process_includes;
use rand::Rng;
use ipfs_api::{IpfsApi, IpfsClient};
// use chacha20poly1305::aead::NewAead;
/// Collection represents a collection of markdown pages and files /// Collection represents a collection of markdown pages and files
#[derive(Clone)] #[derive(Clone)]
@ -423,6 +426,136 @@ impl Collection {
} }
} }
/// Exports files and images from the collection to IPFS, encrypts them, and generates a CSV manifest.
///
/// # Arguments
///
/// * `output_csv_path` - The path to the output CSV file.
///
/// # Returns
///
/// Ok(()) on success or an error.
impl Collection {
/// Exports files and images from the collection to IPFS, encrypts them, and generates a CSV manifest.
///
/// # Arguments
///
/// * `output_csv_path` - The path to the output CSV file.
///
/// # Returns
///
/// Ok(()) on success or an error.
pub async fn export_to_ipfs(&self, output_csv_path: &Path) -> Result<()> {
use blake3::Hasher;
// use chacha20poly1305::{ChaCha20Poly1305, Aead};
use ipfs_api::IpfsClient;
use tokio::fs::File;
use tokio::io::AsyncReadExt;
use csv::Writer;
use rand::rngs::OsRng;
use chacha20poly1305::aead::generic_array::GenericArray;
// Create the output directory if it doesn't exist
if let Some(parent) = output_csv_path.parent() {
tokio::fs::create_dir_all(parent).await.map_err(DocTreeError::IoError)?;
}
// Create the CSV writer
let mut writer = Writer::from_path(output_csv_path).map_err(|e| DocTreeError::CsvError(e.to_string()))?;
// Write the CSV header
writer.write_record(&["collectionname", "filename", "blakehash", "ipfshash", "size"]).map_err(|e| DocTreeError::CsvError(e.to_string()))?;
// Connect to IPFS
// let ipfs = IpfsClient::new("127.0.0.1:5001").await.map_err(|e| DocTreeError::IpfsError(e.to_string()))?;
let ipfs = IpfsClient::default();
// Get the list of pages and files
let pages = self.page_list()?;
let files = self.file_list()?;
// Combine the lists
let mut entries = pages;
entries.extend(files);
for entry_name in entries {
// Get the relative path from Redis
let relative_path = self.storage.get_collection_entry(&self.name, &entry_name)
.map_err(|_| DocTreeError::FileNotFound(entry_name.clone()))?;
let file_path = self.path.join(&relative_path);
// Read file content
let mut file = match File::open(&file_path).await {
Ok(file) => file,
Err(e) => {
eprintln!("Error opening file {:?}: {}", file_path, e);
continue;
}
};
let mut content = Vec::new();
let size = match file.read_to_end(&mut content).await {
Ok(size) => size,
Err(e) => {
eprintln!("Error reading file {:?}: {}", file_path, e);
continue;
}
};
// Calculate Blake3 hash
let mut hasher = Hasher::new();
hasher.update(&content);
let blake_hash = hasher.finalize();
let blake_hash_hex = blake_hash.to_hex().to_string();
// Use Blake3 hash as key for ChaCha20Poly1305
let key = blake_hash.as_bytes();
//let cipher = ChaCha20Poly1305::new_from_slice(&key[..32]).map_err(|_| DocTreeError::EncryptionError("Invalid key size".to_string()))?;
// Generate a random nonce
let mut nonce = [0u8; 12];
//OsRng.fill_bytes(&mut nonce);
// Encrypt the content
// let encrypted_content = match cipher.encrypt(GenericArray::from_slice(&nonce), content.as_ref()) {
// Ok(encrypted) => encrypted,
// Err(e) => {
// eprintln!("Error encrypting file {:?}: {}", file_path, e);
// continue;
// }
// };
// Add encrypted content to IPFS
let ipfs_path = match ipfs.add(std::io::Cursor::new(content)).await {
Ok(path) => path,
Err(e) => {
eprintln!("Error adding file to IPFS {:?}: {}", file_path, e);
continue;
}
};
let ipfs_hash = ipfs_path.hash.to_string();
// Write record to CSV
if let Err(e) = writer.write_record(&[
&self.name,
&relative_path,
&blake_hash_hex,
&ipfs_hash,
&size.to_string(),
]) {
eprintln!("Error writing CSV record for {:?}: {}", file_path, e);
continue;
}
}
// Flush the CSV writer
writer.flush().map_err(|e| DocTreeError::CsvError(e.to_string()))?;
Ok(())
}
}
impl CollectionBuilder { impl CollectionBuilder {
/// Set the storage backend /// Set the storage backend
/// ///

View File

@ -530,6 +530,35 @@ impl DocTree {
Ok(()) Ok(())
} }
/// Exports all collections to IPFS, encrypting their files and generating CSV manifests.
///
/// # Arguments
///
/// * `output_dir` - The directory to save the output CSV files.
///
/// # Returns
///
/// Ok(()) on success or an error.
pub async fn export_collections_to_ipfs<P: AsRef<Path>>(&self, output_dir: P) -> Result<()> {
use tokio::fs;
let output_dir = output_dir.as_ref();
// Create the output directory if it doesn't exist
fs::create_dir_all(output_dir).await.map_err(DocTreeError::IoError)?;
for (name, collection) in &self.collections {
let csv_file_path = output_dir.join(format!("{}.csv", name));
println!("DEBUG: Exporting collection '{}' to IPFS and generating CSV at {:?}", name, csv_file_path);
if let Err(e) = collection.export_to_ipfs(&csv_file_path).await {
eprintln!("Error exporting collection '{}': {}", name, e);
// Continue with the next collection
}
}
Ok(())
}
} }
impl DocTreeBuilder { impl DocTreeBuilder {
@ -713,9 +742,6 @@ impl DocTreeBuilder {
} }
/// Create a new DocTree instance /// Create a new DocTree instance
/// ///
/// For backward compatibility, it also accepts path and name parameters /// For backward compatibility, it also accepts path and name parameters

View File

@ -42,6 +42,18 @@ pub enum DocTreeError {
/// Redis error /// Redis error
#[error("Redis error: {0}")] #[error("Redis error: {0}")]
RedisError(String), RedisError(String),
/// CSV error
#[error("CSV error: {0}")]
CsvError(String),
/// IPFS error
#[error("IPFS error: {0}")]
IpfsError(String),
/// Encryption error
#[error("Encryption error: {0}")]
EncryptionError(String),
} }
/// Result type alias for doctree operations /// Result type alias for doctree operations

View File

@ -4,7 +4,6 @@
//! and processing includes between documents. //! and processing includes between documents.
// Import lazy_static for global state // Import lazy_static for global state
extern crate lazy_static;
mod error; mod error;
mod storage; mod storage;

View File

@ -0,0 +1,89 @@
# Implementation Plan: Exporting DocTree Collections to IPFS
**Objective:** Add functionality to the `doctree` library to export files and images from collections to IPFS, encrypting them using Blake3 hash as the key and ChaCha20Poly1305, and generating a CSV manifest.
**Dependencies:**
We will need to add the following dependencies to the `[dependencies]` section of `doctree/Cargo.toml`:
* `ipfs-api = "0.17.0"`: For interacting with the IPFS daemon.
* `chacha20poly1305 = "0.10.1"`: For symmetric encryption.
* `blake3 = "1.3.1"`: For calculating Blake3 hashes.
* `csv = "1.1"`: For writing the CSV manifest file.
* `walkdir = "2.3.2"`: Already a dependency, but will be used for iterating through collection files.
* `tokio = { version = "1", features = ["full"] }`: `ipfs-api` requires an async runtime.
**Plan:**
1. **Modify `doctree/Cargo.toml`:** Add the new dependencies.
```toml
[dependencies]
# Existing dependencies...
ipfs-api = "0.17.0"
chacha20poly1305 = "0.10.1"
blake3 = "1.3.1"
csv = "1.1"
walkdir = "2.3.2"
tokio = { version = "1", features = ["full"] }
```
2. **Implement `export_to_ipfs` method in `doctree/src/collection.rs`:**
* Add necessary imports: `std::path::PathBuf`, `std::fs`, `blake3`, `chacha20poly1305::ChaCha20Poly1305`, `chacha20poly1305::aead::Aead`, `chacha20poly1305::aead::NewAead`, `rust_ipfs::Ipfs`, `rust_ipfs::IpfsPath`, `tokio`, `csv`.
* Define an `async` method `export_to_ipfs` on the `Collection` struct. This method will take the output CSV file path as an argument.
* Inside the method, create a `csv::Writer` to write the manifest.
* Use `walkdir::WalkDir` to traverse the collection's directory (`self.path`).
* Filter out directories and the `.collection` file.
* For each file:
* Read the file content.
* Calculate the Blake3 hash of the content.
* Use the first 32 bytes of the Blake3 hash as the key for `ChaCha20Poly1305`. Generate a random nonce.
* Encrypt the file content using `ChaCha20Poly1305`.
* Connect to the local IPFS daemon using `ipfs-api`.
* Add the encrypted content to IPFS.
* Get the IPFS hash and the size of the original file.
* Write a record to the CSV file with: `self.name`, filename (relative to collection path), Blake3 hash (hex encoded), IPFS hash, and original file size.
* Handle potential errors during file reading, hashing, encryption, IPFS interaction, and CSV writing.
3. **Implement `export_collections_to_ipfs` method in `doctree/src/doctree.rs`:**
* Add necessary imports: `tokio`.
* Define an `async` method `export_collections_to_ipfs` on the `DocTree` struct. This method will take the output CSV directory path as an argument.
* Inside the method, iterate through the `self.collections` HashMap.
* For each collection, construct the output CSV file path (e.g., `output_dir/collection_name.csv`).
* Call the `export_to_ipfs` method on the collection, awaiting the result.
* Handle potential errors from the collection export.
4. **Export the new methods:** Make the new methods public in `doctree/src/lib.rs`.
```rust
// Existing exports...
pub use doctree::{DocTree, DocTreeBuilder, new, from_directory};
```
should become:
```rust
// Existing exports...
pub use doctree::{DocTree, DocTreeBuilder, new, from_directory, export_collections_to_ipfs};
pub use collection::export_to_ipfs; // Assuming you want to expose the collection method as well
```
**Mermaid Diagram:**
```mermaid
graph TD
A[DocTree] --> B{Iterate Collections};
B --> C[Collection];
C --> D{Iterate Files/Images};
D --> E[Read File Content];
E --> F[Calculate Blake3 Hash];
F --> G[Encrypt Content (ChaCha20Poly1305)];
G --> H[Add Encrypted Content to IPFS];
H --> I[Get IPFS Hash and Size];
I --> J[Write Record to CSV];
J --> D;
D --> C;
C --> B;
B --> K[CSV Manifest Files];