....

2025-02-24 06:34:38 -07:00
parent 6a2e143b98
commit fff14183a4
15 changed files with 341 additions and 12 deletions
--- a/examples/biztools/bizmodel_docusaurus/bizmodel.vsh
+++ b/examples/biztools/bizmodel_docusaurus/bizmodel.vsh
@@ -23,14 +23,3 @@ model.sheet.export(path:"~/code/github/freeflowuniverse/starlight_template/src/c



-
-// report := model.new_report(
-// 	name: 'example_report'
-// 	title: 'Example Business Model'
-// )!
-
-// report.export(
-// 	path: build_path
-// 	overwrite: true
-// 	format: .docusaurus
-// )!
--- a/examples/biztools/bizmodel_docusaurus/bizmodel_docusaurus.vsh
+++ b/examples/biztools/bizmodel_docusaurus/bizmodel_docusaurus.vsh
@@ -0,0 +1,37 @@
+#!/usr/bin/env -S v -n -w -cg -gc none -no-retry-compilation -cc tcc -d use_openssl -enable-globals run
+
+//#!/usr/bin/env -S v -cg -enable-globals run
+import freeflowuniverse.herolib.biz.bizmodel
+import freeflowuniverse.herolib.core.playbook
+import freeflowuniverse.herolib.core.playcmds
+import os
+
+//TODO: need to fix wrong location
+const playbook_path = os.dir(@FILE) + '/playbook'
+const build_path = os.join_path(os.dir(@FILE), '/docusaurus')
+
+buildpath := '${os.home_dir()}/hero/var/mdbuild/bizmodel'
+
+mut model := bizmodel.getset("example")!
+model.workdir = build_path
+model.play(mut playbook.new(path: playbook_path)!)!
+
+println(model.sheet)
+println(model.sheet.export()!)
+
+// model.sheet.export(path:"~/Downloads/test.csv")!
+// model.sheet.export(path:"~/code/github/freeflowuniverse/starlight_template/src/content/test.csv")!
+
+
+
+
+report := model.new_report(
+	name: 'example_report'
+	title: 'Example Business Model'
+)!
+
+report.export(
+	path: build_path
+	overwrite: true
+	format: .docusaurus
+)!
--- a/examples/biztools/bizmodel_docusaurus/playbook/.collection
+++ b/examples/biztools/bizmodel_docusaurus/playbook/.collection
--- a/examples/biztools/bizmodel_docusaurus/playbook/bizmodel.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/bizmodel.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/cost_centers.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/cost_centers.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/costs_params.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/costs_params.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/debug.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/debug.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/department_params.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/department_params.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/funding_params.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/funding_params.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/hr_params.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/hr_params.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/params.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/params.md
--- a/examples/biztools/bizmodel_docusaurus/playbook/revenue_params.md
+++ b/examples/biztools/bizmodel_docusaurus/playbook/revenue_params.md
@@ -6,6 +6,8 @@ This company is a cloud company ...

 - name, e.g. for a specific project
 - descr, description of the revenue line item
+- revenue_items: does one of revenue, is not exterpolated
+- revenue_growth: is a revenue stream which is being extrapolated
 - revenue_setup, revenue for 1 item '1000usd'
 - revenue_setup_delay
 - revenue_monthly, revenue per month for 1 item
@@ -25,7 +27,7 @@ This company is a cloud company ...
 ```js
 !!bizmodel.revenue_define bizname:'test'
    descr:'OEM Deals'  
-    revenue_setup:'10:1000000EUR,15:3333,20:1200000'
+    revenue_items:'10:1000000EUR,15:3333,20:1200000'
    cogs_setup_perc: '1:5%,20:10%'  

 !!bizmodel.revenue_define bizname:'test'
--- a/lib/data/dedupestor/README.md
+++ b/lib/data/dedupestor/README.md
@@ -0,0 +1,94 @@
+# DedupeStore
+
+DedupeStore is a content-addressable key-value store with built-in deduplication. It uses blake2b-160 content hashing to identify and deduplicate data, making it ideal for storing files or data blocks where the same content might appear multiple times.
+
+## Features
+
+- Content-based deduplication using blake2b-160 hashing
+- Efficient storage using RadixTree for hash lookups
+- Persistent storage using OurDB
+- Maximum value size limit of 1MB
+- Fast retrieval of data using content hash
+- Automatic deduplication of identical content
+
+## Usage
+
+```v
+import freeflowuniverse.herolib.data.dedupestor
+
+fn main() ! {
+    // Create a new dedupestore
+    mut ds := dedupestor.new(
+        path: 'path/to/store'
+        reset: false // Set to true to reset existing data
+    )!
+
+    // Store some data
+    data := 'Hello, World!'.bytes()
+    hash := ds.store(data)!
+    println('Stored data with hash: ${hash}')
+
+    // Retrieve data using hash
+    retrieved := ds.get(hash)!
+    println('Retrieved data: ${retrieved.bytestr()}')
+
+    // Check if data exists
+    exists := ds.exists(hash)
+    println('Data exists: ${exists}')
+
+    // Attempting to store the same data again returns the same hash
+    same_hash := ds.store(data)!
+    assert hash == same_hash // True, data was deduplicated
+}
+```
+
+## Implementation Details
+
+DedupeStore uses two main components for storage:
+
+1. **RadixTree**: Stores mappings from content hashes to data location IDs
+2. **OurDB**: Stores the actual data blocks
+
+When storing data:
+1. The data is hashed using blake2b-160
+2. If the hash exists in the RadixTree, the existing data location is returned
+3. If the hash is new:
+   - Data is stored in OurDB, getting a new location ID
+   - Hash -> ID mapping is stored in RadixTree
+   - The hash is returned
+
+When retrieving data:
+1. The RadixTree is queried with the hash to get the data location ID
+2. The data is retrieved from OurDB using the ID
+
+## Size Limits
+
+- Maximum value size: 1MB
+- Attempting to store larger values will result in an error
+
+## Error Handling
+
+The store methods return results that should be handled with V's error handling:
+
+```v
+// Handle potential errors
+if hash := ds.store(large_data) {
+    // Success
+    println('Stored with hash: ${hash}')
+} else {
+    // Error occurred
+    println('Error: ${err}')
+}
+```
+
+## Testing
+
+The module includes comprehensive tests covering:
+- Basic store/retrieve operations
+- Deduplication functionality
+- Size limit enforcement
+- Edge cases
+
+Run tests with:
+```bash
+v test lib/data/dedupestor/
--- a/lib/data/dedupestor/dedupestor.v
+++ b/lib/data/dedupestor/dedupestor.v
@@ -0,0 +1,99 @@
+module dedupestor
+
+import crypto.blake2b
+import freeflowuniverse.herolib.data.radixtree
+import freeflowuniverse.herolib.data.ourdb
+
+pub const max_value_size = 1024 * 1024 // 1MB
+
+// DedupeStore provides a key-value store with deduplication based on content hashing
+pub struct DedupeStore {
+mut:
+	radix &radixtree.RadixTree // For storing hash -> id mappings
+	data  &ourdb.OurDB        // For storing the actual data
+}
+
+@[params]
+pub struct NewArgs {
+pub mut:
+	path  string    // Base path for the store
+	reset bool      // Whether to reset existing data
+}
+
+// new creates a new deduplication store
+pub fn new(args NewArgs) !&DedupeStore {
+	// Create the radixtree for hash -> id mapping
+	mut rt := radixtree.new(
+		path: '${args.path}/radixtree'
+		reset: args.reset
+	)!
+
+	// Create the ourdb for actual data storage
+	mut db := ourdb.new(
+		path: '${args.path}/data'
+		record_size_max: max_value_size
+		incremental_mode: true // We want auto-incrementing IDs
+		reset: args.reset
+	)!
+
+	return &DedupeStore{
+		radix: rt
+		data: db
+	}
+}
+
+// store stores a value and returns its hash
+// If the value already exists (same hash), returns the existing hash without storing again
+pub fn (mut ds DedupeStore) store(value []u8) !string {
+	// Check size limit
+	if value.len > max_value_size {
+		return error('value size exceeds maximum allowed size of 1MB')
+	}
+
+	// Calculate blake160 hash of the value
+	hash := blake2b.sum160(value).hex()
+
+	// Check if this hash already exists
+	if _ := ds.radix.search(hash) {
+		// Value already exists, return the hash
+		return hash
+	}
+
+	// Store the actual data in ourdb
+	id := ds.data.set(data: value)!
+
+	// Convert id to bytes for storage in radixtree
+	id_bytes := u32_to_bytes(id)
+
+	// Store the mapping of hash -> id in radixtree
+	ds.radix.insert(hash, id_bytes)!
+
+	return hash
+}
+
+// get retrieves a value by its hash
+pub fn (mut ds DedupeStore) get(hash string) ![]u8 {
+	// Get the ID from radixtree
+	id_bytes := ds.radix.search(hash)!
+	
+	// Convert bytes back to u32 id
+	id := bytes_to_u32(id_bytes)
+
+	// Get the actual data from ourdb
+	return ds.data.get(id)!
+}
+
+// exists checks if a value with the given hash exists
+pub fn (mut ds DedupeStore) exists(hash string) bool {
+	return if _ := ds.radix.search(hash) { true } else { false }
+}
+
+// Helper function to convert u32 to []u8
+fn u32_to_bytes(n u32) []u8 {
+	return [u8(n), u8(n >> 8), u8(n >> 16), u8(n >> 24)]
+}
+
+// Helper function to convert []u8 to u32
+fn bytes_to_u32(b []u8) u32 {
+	return u32(b[0]) | (u32(b[1]) << 8) | (u32(b[2]) << 16) | (u32(b[3]) << 24)
+}
--- a/lib/data/dedupestor/dedupestor_test.v
+++ b/lib/data/dedupestor/dedupestor_test.v
@@ -0,0 +1,108 @@
+module dedupestor
+
+import os
+
+fn testsuite_begin() ! {
+	// Ensure test directories exist and are clean
+	test_dirs := [
+		'/tmp/dedupestor_test',
+		'/tmp/dedupestor_test_size',
+		'/tmp/dedupestor_test_exists',
+		'/tmp/dedupestor_test_multiple'
+	]
+	
+	for dir in test_dirs {
+		if os.exists(dir) {
+			os.rmdir_all(dir) or {}
+		}
+		os.mkdir_all(dir) or {}
+	}
+}
+
+fn test_basic_operations() ! {
+	mut ds := new(
+		path: '/tmp/dedupestor_test'
+		reset: true
+	)!
+
+	// Test storing and retrieving data
+	value1 := 'test data 1'.bytes()
+	hash1 := ds.store(value1)!
+	
+	retrieved1 := ds.get(hash1)!
+	assert retrieved1 == value1
+
+	// Test deduplication
+	hash2 := ds.store(value1)!
+	assert hash1 == hash2 // Should return same hash for same data
+
+	// Test different data gets different hash
+	value2 := 'test data 2'.bytes()
+	hash3 := ds.store(value2)!
+	assert hash1 != hash3 // Should be different hash for different data
+
+	retrieved2 := ds.get(hash3)!
+	assert retrieved2 == value2
+}
+
+fn test_size_limit() ! {
+	mut ds := new(
+		path: '/tmp/dedupestor_test_size'
+		reset: true
+	)!
+
+	// Test data under size limit (1KB)
+	small_data := []u8{len: 1024, init: u8(index)}
+	small_hash := ds.store(small_data)!
+	retrieved := ds.get(small_hash)!
+	assert retrieved == small_data
+
+	// Test data over size limit (2MB)
+	large_data := []u8{len: 2 * 1024 * 1024, init: u8(index)}
+	if _ := ds.store(large_data) {
+		assert false, 'Expected error for data exceeding size limit'
+	}
+}
+
+fn test_exists() ! {
+	mut ds := new(
+		path: '/tmp/dedupestor_test_exists'
+		reset: true
+	)!
+
+	value := 'test data'.bytes()
+	hash := ds.store(value)!
+
+	assert ds.exists(hash) == true
+	assert ds.exists('nonexistent') == false
+}
+
+fn test_multiple_operations() ! {
+	mut ds := new(
+		path: '/tmp/dedupestor_test_multiple'
+		reset: true
+	)!
+
+	// Store multiple values
+	mut values := [][]u8{}
+	mut hashes := []string{}
+
+	for i in 0..5 {
+		value := 'test data ${i}'.bytes()
+		values << value
+		hash := ds.store(value)!
+		hashes << hash
+	}
+
+	// Verify all values can be retrieved
+	for i, hash in hashes {
+		retrieved := ds.get(hash)!
+		assert retrieved == values[i]
+	}
+
+	// Test deduplication by storing same values again
+	for i, value in values {
+		hash := ds.store(value)!
+		assert hash == hashes[i] // Should get same hash for same data
+	}
+}