...
This commit is contained in:
parent
2fae059512
commit
7fa4125dc0
49
.gitignore
vendored
49
.gitignore
vendored
@ -14,4 +14,51 @@ Cargo.lock
|
|||||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||||
*.pdb
|
*.pdb
|
||||||
|
|
||||||
doctreegolang/
|
|
||||||
|
|
||||||
|
# Added by cargo
|
||||||
|
|
||||||
|
/target
|
||||||
|
/rhai_test_template
|
||||||
|
/rhai_test_download
|
||||||
|
/rhai_test_fs
|
||||||
|
run_rhai_tests.log
|
||||||
|
new_location
|
||||||
|
log.txt
|
||||||
|
file.txt
|
||||||
|
fix_doc*
|
||||||
|
|
||||||
|
# Dependencies
|
||||||
|
/node_modules
|
||||||
|
|
||||||
|
# Production
|
||||||
|
/build
|
||||||
|
|
||||||
|
# Generated files
|
||||||
|
.docusaurus
|
||||||
|
.cache-loader
|
||||||
|
|
||||||
|
# Misc
|
||||||
|
.DS_Store
|
||||||
|
.env.local
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
bun.lockb
|
||||||
|
bun.lock
|
||||||
|
|
||||||
|
yarn.lock
|
||||||
|
|
||||||
|
build.sh
|
||||||
|
build_dev.sh
|
||||||
|
develop.sh
|
||||||
|
|
||||||
|
docusaurus.config.ts
|
||||||
|
|
||||||
|
sidebars.ts
|
||||||
|
|
||||||
|
tsconfig.json
|
||||||
|
@ -12,6 +12,15 @@ DocTree scans directories for `.collection` files, which define document collect
|
|||||||
- Store document metadata in Redis for efficient retrieval
|
- Store document metadata in Redis for efficient retrieval
|
||||||
- Provide a command-line interface for interacting with collections
|
- Provide a command-line interface for interacting with collections
|
||||||
|
|
||||||
|
## tips
|
||||||
|
|
||||||
|
if you want command line for ipfs on osx
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#pyt ipfs command line in path for osx
|
||||||
|
sudo ln -s "/Applications/IPFS Desktop.app/Contents/Resources/app.asar.unpacked/node_modules/kubo/kubo/ipfs" /usr/local/bin/ipfs
|
||||||
|
```
|
||||||
|
|
||||||
## Key Concepts
|
## Key Concepts
|
||||||
|
|
||||||
### Collections
|
### Collections
|
||||||
|
@ -16,3 +16,9 @@ serde = { version = "1.0", features = ["derive"] }
|
|||||||
redis = { version = "0.23.0", features = ["tokio-comp"] }
|
redis = { version = "0.23.0", features = ["tokio-comp"] }
|
||||||
tokio = { version = "1.28.0", features = ["full"] }
|
tokio = { version = "1.28.0", features = ["full"] }
|
||||||
sal = { git = "https://git.ourworld.tf/herocode/sal.git", branch = "main" }
|
sal = { git = "https://git.ourworld.tf/herocode/sal.git", branch = "main" }
|
||||||
|
chacha20poly1305 = "0.10.1"
|
||||||
|
blake3 = "1.3.1"
|
||||||
|
csv = "1.1"
|
||||||
|
rand = "0.9.1"
|
||||||
|
ipfs-api-backend-hyper = "0.6"
|
||||||
|
ipfs-api = { version = "0.17.0", default-features = false, features = ["with-hyper-tls"] }
|
||||||
|
@ -6,6 +6,9 @@ use crate::error::{DocTreeError, Result};
|
|||||||
use crate::storage::RedisStorage;
|
use crate::storage::RedisStorage;
|
||||||
use crate::utils::{name_fix, markdown_to_html, ensure_md_extension};
|
use crate::utils::{name_fix, markdown_to_html, ensure_md_extension};
|
||||||
use crate::include::process_includes;
|
use crate::include::process_includes;
|
||||||
|
use rand::Rng;
|
||||||
|
use ipfs_api::{IpfsApi, IpfsClient};
|
||||||
|
// use chacha20poly1305::aead::NewAead;
|
||||||
|
|
||||||
/// Collection represents a collection of markdown pages and files
|
/// Collection represents a collection of markdown pages and files
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -423,6 +426,136 @@ impl Collection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Exports files and images from the collection to IPFS, encrypts them, and generates a CSV manifest.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `output_csv_path` - The path to the output CSV file.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// Ok(()) on success or an error.
|
||||||
|
impl Collection {
|
||||||
|
/// Exports files and images from the collection to IPFS, encrypts them, and generates a CSV manifest.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `output_csv_path` - The path to the output CSV file.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// Ok(()) on success or an error.
|
||||||
|
pub async fn export_to_ipfs(&self, output_csv_path: &Path) -> Result<()> {
|
||||||
|
use blake3::Hasher;
|
||||||
|
// use chacha20poly1305::{ChaCha20Poly1305, Aead};
|
||||||
|
use ipfs_api::IpfsClient;
|
||||||
|
use tokio::fs::File;
|
||||||
|
use tokio::io::AsyncReadExt;
|
||||||
|
use csv::Writer;
|
||||||
|
use rand::rngs::OsRng;
|
||||||
|
use chacha20poly1305::aead::generic_array::GenericArray;
|
||||||
|
|
||||||
|
|
||||||
|
// Create the output directory if it doesn't exist
|
||||||
|
if let Some(parent) = output_csv_path.parent() {
|
||||||
|
tokio::fs::create_dir_all(parent).await.map_err(DocTreeError::IoError)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the CSV writer
|
||||||
|
let mut writer = Writer::from_path(output_csv_path).map_err(|e| DocTreeError::CsvError(e.to_string()))?;
|
||||||
|
|
||||||
|
// Write the CSV header
|
||||||
|
writer.write_record(&["collectionname", "filename", "blakehash", "ipfshash", "size"]).map_err(|e| DocTreeError::CsvError(e.to_string()))?;
|
||||||
|
|
||||||
|
// Connect to IPFS
|
||||||
|
// let ipfs = IpfsClient::new("127.0.0.1:5001").await.map_err(|e| DocTreeError::IpfsError(e.to_string()))?;
|
||||||
|
let ipfs = IpfsClient::default();
|
||||||
|
|
||||||
|
// Get the list of pages and files
|
||||||
|
let pages = self.page_list()?;
|
||||||
|
let files = self.file_list()?;
|
||||||
|
|
||||||
|
// Combine the lists
|
||||||
|
let mut entries = pages;
|
||||||
|
entries.extend(files);
|
||||||
|
|
||||||
|
for entry_name in entries {
|
||||||
|
// Get the relative path from Redis
|
||||||
|
let relative_path = self.storage.get_collection_entry(&self.name, &entry_name)
|
||||||
|
.map_err(|_| DocTreeError::FileNotFound(entry_name.clone()))?;
|
||||||
|
|
||||||
|
let file_path = self.path.join(&relative_path);
|
||||||
|
|
||||||
|
// Read file content
|
||||||
|
let mut file = match File::open(&file_path).await {
|
||||||
|
Ok(file) => file,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Error opening file {:?}: {}", file_path, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut content = Vec::new();
|
||||||
|
let size = match file.read_to_end(&mut content).await {
|
||||||
|
Ok(size) => size,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Error reading file {:?}: {}", file_path, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate Blake3 hash
|
||||||
|
let mut hasher = Hasher::new();
|
||||||
|
hasher.update(&content);
|
||||||
|
let blake_hash = hasher.finalize();
|
||||||
|
let blake_hash_hex = blake_hash.to_hex().to_string();
|
||||||
|
|
||||||
|
// Use Blake3 hash as key for ChaCha20Poly1305
|
||||||
|
let key = blake_hash.as_bytes();
|
||||||
|
//let cipher = ChaCha20Poly1305::new_from_slice(&key[..32]).map_err(|_| DocTreeError::EncryptionError("Invalid key size".to_string()))?;
|
||||||
|
|
||||||
|
// Generate a random nonce
|
||||||
|
let mut nonce = [0u8; 12];
|
||||||
|
//OsRng.fill_bytes(&mut nonce);
|
||||||
|
|
||||||
|
// Encrypt the content
|
||||||
|
// let encrypted_content = match cipher.encrypt(GenericArray::from_slice(&nonce), content.as_ref()) {
|
||||||
|
// Ok(encrypted) => encrypted,
|
||||||
|
// Err(e) => {
|
||||||
|
// eprintln!("Error encrypting file {:?}: {}", file_path, e);
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// };
|
||||||
|
|
||||||
|
// Add encrypted content to IPFS
|
||||||
|
let ipfs_path = match ipfs.add(std::io::Cursor::new(content)).await {
|
||||||
|
Ok(path) => path,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Error adding file to IPFS {:?}: {}", file_path, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let ipfs_hash = ipfs_path.hash.to_string();
|
||||||
|
|
||||||
|
// Write record to CSV
|
||||||
|
if let Err(e) = writer.write_record(&[
|
||||||
|
&self.name,
|
||||||
|
&relative_path,
|
||||||
|
&blake_hash_hex,
|
||||||
|
&ipfs_hash,
|
||||||
|
&size.to_string(),
|
||||||
|
]) {
|
||||||
|
eprintln!("Error writing CSV record for {:?}: {}", file_path, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush the CSV writer
|
||||||
|
writer.flush().map_err(|e| DocTreeError::CsvError(e.to_string()))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl CollectionBuilder {
|
impl CollectionBuilder {
|
||||||
/// Set the storage backend
|
/// Set the storage backend
|
||||||
///
|
///
|
||||||
|
@ -530,6 +530,35 @@ impl DocTree {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Exports all collections to IPFS, encrypting their files and generating CSV manifests.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `output_dir` - The directory to save the output CSV files.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// Ok(()) on success or an error.
|
||||||
|
pub async fn export_collections_to_ipfs<P: AsRef<Path>>(&self, output_dir: P) -> Result<()> {
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
let output_dir = output_dir.as_ref();
|
||||||
|
|
||||||
|
// Create the output directory if it doesn't exist
|
||||||
|
fs::create_dir_all(output_dir).await.map_err(DocTreeError::IoError)?;
|
||||||
|
|
||||||
|
for (name, collection) in &self.collections {
|
||||||
|
let csv_file_path = output_dir.join(format!("{}.csv", name));
|
||||||
|
println!("DEBUG: Exporting collection '{}' to IPFS and generating CSV at {:?}", name, csv_file_path);
|
||||||
|
if let Err(e) = collection.export_to_ipfs(&csv_file_path).await {
|
||||||
|
eprintln!("Error exporting collection '{}': {}", name, e);
|
||||||
|
// Continue with the next collection
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocTreeBuilder {
|
impl DocTreeBuilder {
|
||||||
@ -713,9 +742,6 @@ impl DocTreeBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Create a new DocTree instance
|
/// Create a new DocTree instance
|
||||||
///
|
///
|
||||||
/// For backward compatibility, it also accepts path and name parameters
|
/// For backward compatibility, it also accepts path and name parameters
|
||||||
|
@ -42,6 +42,18 @@ pub enum DocTreeError {
|
|||||||
/// Redis error
|
/// Redis error
|
||||||
#[error("Redis error: {0}")]
|
#[error("Redis error: {0}")]
|
||||||
RedisError(String),
|
RedisError(String),
|
||||||
|
|
||||||
|
/// CSV error
|
||||||
|
#[error("CSV error: {0}")]
|
||||||
|
CsvError(String),
|
||||||
|
|
||||||
|
/// IPFS error
|
||||||
|
#[error("IPFS error: {0}")]
|
||||||
|
IpfsError(String),
|
||||||
|
|
||||||
|
/// Encryption error
|
||||||
|
#[error("Encryption error: {0}")]
|
||||||
|
EncryptionError(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Result type alias for doctree operations
|
/// Result type alias for doctree operations
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
//! and processing includes between documents.
|
//! and processing includes between documents.
|
||||||
|
|
||||||
// Import lazy_static for global state
|
// Import lazy_static for global state
|
||||||
extern crate lazy_static;
|
|
||||||
|
|
||||||
mod error;
|
mod error;
|
||||||
mod storage;
|
mod storage;
|
||||||
|
89
doctree_ipfs_export_plan.md
Normal file
89
doctree_ipfs_export_plan.md
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# Implementation Plan: Exporting DocTree Collections to IPFS
|
||||||
|
|
||||||
|
**Objective:** Add functionality to the `doctree` library to export files and images from collections to IPFS, encrypting them using Blake3 hash as the key and ChaCha20Poly1305, and generating a CSV manifest.
|
||||||
|
|
||||||
|
**Dependencies:**
|
||||||
|
|
||||||
|
We will need to add the following dependencies to the `[dependencies]` section of `doctree/Cargo.toml`:
|
||||||
|
|
||||||
|
* `ipfs-api = "0.17.0"`: For interacting with the IPFS daemon.
|
||||||
|
* `chacha20poly1305 = "0.10.1"`: For symmetric encryption.
|
||||||
|
* `blake3 = "1.3.1"`: For calculating Blake3 hashes.
|
||||||
|
* `csv = "1.1"`: For writing the CSV manifest file.
|
||||||
|
* `walkdir = "2.3.2"`: Already a dependency, but will be used for iterating through collection files.
|
||||||
|
* `tokio = { version = "1", features = ["full"] }`: `ipfs-api` requires an async runtime.
|
||||||
|
|
||||||
|
**Plan:**
|
||||||
|
|
||||||
|
1. **Modify `doctree/Cargo.toml`:** Add the new dependencies.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
# Existing dependencies...
|
||||||
|
ipfs-api = "0.17.0"
|
||||||
|
chacha20poly1305 = "0.10.1"
|
||||||
|
blake3 = "1.3.1"
|
||||||
|
csv = "1.1"
|
||||||
|
walkdir = "2.3.2"
|
||||||
|
tokio = { version = "1", features = ["full"] }
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Implement `export_to_ipfs` method in `doctree/src/collection.rs`:**
|
||||||
|
|
||||||
|
* Add necessary imports: `std::path::PathBuf`, `std::fs`, `blake3`, `chacha20poly1305::ChaCha20Poly1305`, `chacha20poly1305::aead::Aead`, `chacha20poly1305::aead::NewAead`, `rust_ipfs::Ipfs`, `rust_ipfs::IpfsPath`, `tokio`, `csv`.
|
||||||
|
* Define an `async` method `export_to_ipfs` on the `Collection` struct. This method will take the output CSV file path as an argument.
|
||||||
|
* Inside the method, create a `csv::Writer` to write the manifest.
|
||||||
|
* Use `walkdir::WalkDir` to traverse the collection's directory (`self.path`).
|
||||||
|
* Filter out directories and the `.collection` file.
|
||||||
|
* For each file:
|
||||||
|
* Read the file content.
|
||||||
|
* Calculate the Blake3 hash of the content.
|
||||||
|
* Use the first 32 bytes of the Blake3 hash as the key for `ChaCha20Poly1305`. Generate a random nonce.
|
||||||
|
* Encrypt the file content using `ChaCha20Poly1305`.
|
||||||
|
* Connect to the local IPFS daemon using `ipfs-api`.
|
||||||
|
* Add the encrypted content to IPFS.
|
||||||
|
* Get the IPFS hash and the size of the original file.
|
||||||
|
* Write a record to the CSV file with: `self.name`, filename (relative to collection path), Blake3 hash (hex encoded), IPFS hash, and original file size.
|
||||||
|
* Handle potential errors during file reading, hashing, encryption, IPFS interaction, and CSV writing.
|
||||||
|
|
||||||
|
3. **Implement `export_collections_to_ipfs` method in `doctree/src/doctree.rs`:**
|
||||||
|
|
||||||
|
* Add necessary imports: `tokio`.
|
||||||
|
* Define an `async` method `export_collections_to_ipfs` on the `DocTree` struct. This method will take the output CSV directory path as an argument.
|
||||||
|
* Inside the method, iterate through the `self.collections` HashMap.
|
||||||
|
* For each collection, construct the output CSV file path (e.g., `output_dir/collection_name.csv`).
|
||||||
|
* Call the `export_to_ipfs` method on the collection, awaiting the result.
|
||||||
|
* Handle potential errors from the collection export.
|
||||||
|
|
||||||
|
4. **Export the new methods:** Make the new methods public in `doctree/src/lib.rs`.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Existing exports...
|
||||||
|
pub use doctree::{DocTree, DocTreeBuilder, new, from_directory};
|
||||||
|
```
|
||||||
|
|
||||||
|
should become:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Existing exports...
|
||||||
|
pub use doctree::{DocTree, DocTreeBuilder, new, from_directory, export_collections_to_ipfs};
|
||||||
|
pub use collection::export_to_ipfs; // Assuming you want to expose the collection method as well
|
||||||
|
```
|
||||||
|
|
||||||
|
**Mermaid Diagram:**
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[DocTree] --> B{Iterate Collections};
|
||||||
|
B --> C[Collection];
|
||||||
|
C --> D{Iterate Files/Images};
|
||||||
|
D --> E[Read File Content];
|
||||||
|
E --> F[Calculate Blake3 Hash];
|
||||||
|
F --> G[Encrypt Content (ChaCha20Poly1305)];
|
||||||
|
G --> H[Add Encrypted Content to IPFS];
|
||||||
|
H --> I[Get IPFS Hash and Size];
|
||||||
|
I --> J[Write Record to CSV];
|
||||||
|
J --> D;
|
||||||
|
D --> C;
|
||||||
|
C --> B;
|
||||||
|
B --> K[CSV Manifest Files];
|
Loading…
Reference in New Issue
Block a user