From fff14183a4e5df6892143c9c08c20b38f4170b41 Mon Sep 17 00:00:00 2001 From: despiegk Date: Mon, 24 Feb 2025 06:34:38 -0700 Subject: [PATCH] .... --- .../{bizmodel_docusaurus => }/bizmodel.vsh | 11 -- .../bizmodel_docusaurus.vsh | 37 ++++++ .../playbook/.collection | 0 .../playbook/bizmodel.md | 0 .../playbook/cost_centers.md | 0 .../playbook/costs_params.md | 0 .../playbook/debug.md | 0 .../playbook/department_params.md | 0 .../playbook/funding_params.md | 0 .../playbook/hr_params.md | 0 .../playbook/params.md | 0 .../playbook/revenue_params.md | 4 +- lib/data/dedupestor/README.md | 94 +++++++++++++++ lib/data/dedupestor/dedupestor.v | 99 ++++++++++++++++ lib/data/dedupestor/dedupestor_test.v | 108 ++++++++++++++++++ 15 files changed, 341 insertions(+), 12 deletions(-) rename examples/biztools/{bizmodel_docusaurus => }/bizmodel.vsh (80%) create mode 100755 examples/biztools/bizmodel_docusaurus/bizmodel_docusaurus.vsh rename examples/biztools/{bizmodel_docusaurus => }/playbook/.collection (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/bizmodel.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/cost_centers.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/costs_params.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/debug.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/department_params.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/funding_params.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/hr_params.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/params.md (100%) rename examples/biztools/{bizmodel_docusaurus => }/playbook/revenue_params.md (94%) create mode 100644 lib/data/dedupestor/README.md create mode 100644 lib/data/dedupestor/dedupestor.v create mode 100644 lib/data/dedupestor/dedupestor_test.v diff --git a/examples/biztools/bizmodel_docusaurus/bizmodel.vsh b/examples/biztools/bizmodel.vsh similarity index 80% rename from examples/biztools/bizmodel_docusaurus/bizmodel.vsh rename to examples/biztools/bizmodel.vsh index deb19abc..cec9faf8 100755 --- a/examples/biztools/bizmodel_docusaurus/bizmodel.vsh +++ b/examples/biztools/bizmodel.vsh @@ -23,14 +23,3 @@ model.sheet.export(path:"~/code/github/freeflowuniverse/starlight_template/src/c - -// report := model.new_report( -// name: 'example_report' -// title: 'Example Business Model' -// )! - -// report.export( -// path: build_path -// overwrite: true -// format: .docusaurus -// )! diff --git a/examples/biztools/bizmodel_docusaurus/bizmodel_docusaurus.vsh b/examples/biztools/bizmodel_docusaurus/bizmodel_docusaurus.vsh new file mode 100755 index 00000000..e72410b1 --- /dev/null +++ b/examples/biztools/bizmodel_docusaurus/bizmodel_docusaurus.vsh @@ -0,0 +1,37 @@ +#!/usr/bin/env -S v -n -w -cg -gc none -no-retry-compilation -cc tcc -d use_openssl -enable-globals run + +//#!/usr/bin/env -S v -cg -enable-globals run +import freeflowuniverse.herolib.biz.bizmodel +import freeflowuniverse.herolib.core.playbook +import freeflowuniverse.herolib.core.playcmds +import os + +//TODO: need to fix wrong location +const playbook_path = os.dir(@FILE) + '/playbook' +const build_path = os.join_path(os.dir(@FILE), '/docusaurus') + +buildpath := '${os.home_dir()}/hero/var/mdbuild/bizmodel' + +mut model := bizmodel.getset("example")! +model.workdir = build_path +model.play(mut playbook.new(path: playbook_path)!)! + +println(model.sheet) +println(model.sheet.export()!) + +// model.sheet.export(path:"~/Downloads/test.csv")! +// model.sheet.export(path:"~/code/github/freeflowuniverse/starlight_template/src/content/test.csv")! + + + + +report := model.new_report( + name: 'example_report' + title: 'Example Business Model' +)! + +report.export( + path: build_path + overwrite: true + format: .docusaurus +)! diff --git a/examples/biztools/bizmodel_docusaurus/playbook/.collection b/examples/biztools/playbook/.collection similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/.collection rename to examples/biztools/playbook/.collection diff --git a/examples/biztools/bizmodel_docusaurus/playbook/bizmodel.md b/examples/biztools/playbook/bizmodel.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/bizmodel.md rename to examples/biztools/playbook/bizmodel.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/cost_centers.md b/examples/biztools/playbook/cost_centers.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/cost_centers.md rename to examples/biztools/playbook/cost_centers.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/costs_params.md b/examples/biztools/playbook/costs_params.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/costs_params.md rename to examples/biztools/playbook/costs_params.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/debug.md b/examples/biztools/playbook/debug.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/debug.md rename to examples/biztools/playbook/debug.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/department_params.md b/examples/biztools/playbook/department_params.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/department_params.md rename to examples/biztools/playbook/department_params.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/funding_params.md b/examples/biztools/playbook/funding_params.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/funding_params.md rename to examples/biztools/playbook/funding_params.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/hr_params.md b/examples/biztools/playbook/hr_params.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/hr_params.md rename to examples/biztools/playbook/hr_params.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/params.md b/examples/biztools/playbook/params.md similarity index 100% rename from examples/biztools/bizmodel_docusaurus/playbook/params.md rename to examples/biztools/playbook/params.md diff --git a/examples/biztools/bizmodel_docusaurus/playbook/revenue_params.md b/examples/biztools/playbook/revenue_params.md similarity index 94% rename from examples/biztools/bizmodel_docusaurus/playbook/revenue_params.md rename to examples/biztools/playbook/revenue_params.md index 361eb016..689ebd2e 100644 --- a/examples/biztools/bizmodel_docusaurus/playbook/revenue_params.md +++ b/examples/biztools/playbook/revenue_params.md @@ -6,6 +6,8 @@ This company is a cloud company ... - name, e.g. for a specific project - descr, description of the revenue line item +- revenue_items: does one of revenue, is not exterpolated +- revenue_growth: is a revenue stream which is being extrapolated - revenue_setup, revenue for 1 item '1000usd' - revenue_setup_delay - revenue_monthly, revenue per month for 1 item @@ -25,7 +27,7 @@ This company is a cloud company ... ```js !!bizmodel.revenue_define bizname:'test' descr:'OEM Deals' - revenue_setup:'10:1000000EUR,15:3333,20:1200000' + revenue_items:'10:1000000EUR,15:3333,20:1200000' cogs_setup_perc: '1:5%,20:10%' !!bizmodel.revenue_define bizname:'test' diff --git a/lib/data/dedupestor/README.md b/lib/data/dedupestor/README.md new file mode 100644 index 00000000..e4f8f764 --- /dev/null +++ b/lib/data/dedupestor/README.md @@ -0,0 +1,94 @@ +# DedupeStore + +DedupeStore is a content-addressable key-value store with built-in deduplication. It uses blake2b-160 content hashing to identify and deduplicate data, making it ideal for storing files or data blocks where the same content might appear multiple times. + +## Features + +- Content-based deduplication using blake2b-160 hashing +- Efficient storage using RadixTree for hash lookups +- Persistent storage using OurDB +- Maximum value size limit of 1MB +- Fast retrieval of data using content hash +- Automatic deduplication of identical content + +## Usage + +```v +import freeflowuniverse.herolib.data.dedupestor + +fn main() ! { + // Create a new dedupestore + mut ds := dedupestor.new( + path: 'path/to/store' + reset: false // Set to true to reset existing data + )! + + // Store some data + data := 'Hello, World!'.bytes() + hash := ds.store(data)! + println('Stored data with hash: ${hash}') + + // Retrieve data using hash + retrieved := ds.get(hash)! + println('Retrieved data: ${retrieved.bytestr()}') + + // Check if data exists + exists := ds.exists(hash) + println('Data exists: ${exists}') + + // Attempting to store the same data again returns the same hash + same_hash := ds.store(data)! + assert hash == same_hash // True, data was deduplicated +} +``` + +## Implementation Details + +DedupeStore uses two main components for storage: + +1. **RadixTree**: Stores mappings from content hashes to data location IDs +2. **OurDB**: Stores the actual data blocks + +When storing data: +1. The data is hashed using blake2b-160 +2. If the hash exists in the RadixTree, the existing data location is returned +3. If the hash is new: + - Data is stored in OurDB, getting a new location ID + - Hash -> ID mapping is stored in RadixTree + - The hash is returned + +When retrieving data: +1. The RadixTree is queried with the hash to get the data location ID +2. The data is retrieved from OurDB using the ID + +## Size Limits + +- Maximum value size: 1MB +- Attempting to store larger values will result in an error + +## Error Handling + +The store methods return results that should be handled with V's error handling: + +```v +// Handle potential errors +if hash := ds.store(large_data) { + // Success + println('Stored with hash: ${hash}') +} else { + // Error occurred + println('Error: ${err}') +} +``` + +## Testing + +The module includes comprehensive tests covering: +- Basic store/retrieve operations +- Deduplication functionality +- Size limit enforcement +- Edge cases + +Run tests with: +```bash +v test lib/data/dedupestor/ diff --git a/lib/data/dedupestor/dedupestor.v b/lib/data/dedupestor/dedupestor.v new file mode 100644 index 00000000..85fce577 --- /dev/null +++ b/lib/data/dedupestor/dedupestor.v @@ -0,0 +1,99 @@ +module dedupestor + +import crypto.blake2b +import freeflowuniverse.herolib.data.radixtree +import freeflowuniverse.herolib.data.ourdb + +pub const max_value_size = 1024 * 1024 // 1MB + +// DedupeStore provides a key-value store with deduplication based on content hashing +pub struct DedupeStore { +mut: + radix &radixtree.RadixTree // For storing hash -> id mappings + data &ourdb.OurDB // For storing the actual data +} + +@[params] +pub struct NewArgs { +pub mut: + path string // Base path for the store + reset bool // Whether to reset existing data +} + +// new creates a new deduplication store +pub fn new(args NewArgs) !&DedupeStore { + // Create the radixtree for hash -> id mapping + mut rt := radixtree.new( + path: '${args.path}/radixtree' + reset: args.reset + )! + + // Create the ourdb for actual data storage + mut db := ourdb.new( + path: '${args.path}/data' + record_size_max: max_value_size + incremental_mode: true // We want auto-incrementing IDs + reset: args.reset + )! + + return &DedupeStore{ + radix: rt + data: db + } +} + +// store stores a value and returns its hash +// If the value already exists (same hash), returns the existing hash without storing again +pub fn (mut ds DedupeStore) store(value []u8) !string { + // Check size limit + if value.len > max_value_size { + return error('value size exceeds maximum allowed size of 1MB') + } + + // Calculate blake160 hash of the value + hash := blake2b.sum160(value).hex() + + // Check if this hash already exists + if _ := ds.radix.search(hash) { + // Value already exists, return the hash + return hash + } + + // Store the actual data in ourdb + id := ds.data.set(data: value)! + + // Convert id to bytes for storage in radixtree + id_bytes := u32_to_bytes(id) + + // Store the mapping of hash -> id in radixtree + ds.radix.insert(hash, id_bytes)! + + return hash +} + +// get retrieves a value by its hash +pub fn (mut ds DedupeStore) get(hash string) ![]u8 { + // Get the ID from radixtree + id_bytes := ds.radix.search(hash)! + + // Convert bytes back to u32 id + id := bytes_to_u32(id_bytes) + + // Get the actual data from ourdb + return ds.data.get(id)! +} + +// exists checks if a value with the given hash exists +pub fn (mut ds DedupeStore) exists(hash string) bool { + return if _ := ds.radix.search(hash) { true } else { false } +} + +// Helper function to convert u32 to []u8 +fn u32_to_bytes(n u32) []u8 { + return [u8(n), u8(n >> 8), u8(n >> 16), u8(n >> 24)] +} + +// Helper function to convert []u8 to u32 +fn bytes_to_u32(b []u8) u32 { + return u32(b[0]) | (u32(b[1]) << 8) | (u32(b[2]) << 16) | (u32(b[3]) << 24) +} diff --git a/lib/data/dedupestor/dedupestor_test.v b/lib/data/dedupestor/dedupestor_test.v new file mode 100644 index 00000000..f10c97d0 --- /dev/null +++ b/lib/data/dedupestor/dedupestor_test.v @@ -0,0 +1,108 @@ +module dedupestor + +import os + +fn testsuite_begin() ! { + // Ensure test directories exist and are clean + test_dirs := [ + '/tmp/dedupestor_test', + '/tmp/dedupestor_test_size', + '/tmp/dedupestor_test_exists', + '/tmp/dedupestor_test_multiple' + ] + + for dir in test_dirs { + if os.exists(dir) { + os.rmdir_all(dir) or {} + } + os.mkdir_all(dir) or {} + } +} + +fn test_basic_operations() ! { + mut ds := new( + path: '/tmp/dedupestor_test' + reset: true + )! + + // Test storing and retrieving data + value1 := 'test data 1'.bytes() + hash1 := ds.store(value1)! + + retrieved1 := ds.get(hash1)! + assert retrieved1 == value1 + + // Test deduplication + hash2 := ds.store(value1)! + assert hash1 == hash2 // Should return same hash for same data + + // Test different data gets different hash + value2 := 'test data 2'.bytes() + hash3 := ds.store(value2)! + assert hash1 != hash3 // Should be different hash for different data + + retrieved2 := ds.get(hash3)! + assert retrieved2 == value2 +} + +fn test_size_limit() ! { + mut ds := new( + path: '/tmp/dedupestor_test_size' + reset: true + )! + + // Test data under size limit (1KB) + small_data := []u8{len: 1024, init: u8(index)} + small_hash := ds.store(small_data)! + retrieved := ds.get(small_hash)! + assert retrieved == small_data + + // Test data over size limit (2MB) + large_data := []u8{len: 2 * 1024 * 1024, init: u8(index)} + if _ := ds.store(large_data) { + assert false, 'Expected error for data exceeding size limit' + } +} + +fn test_exists() ! { + mut ds := new( + path: '/tmp/dedupestor_test_exists' + reset: true + )! + + value := 'test data'.bytes() + hash := ds.store(value)! + + assert ds.exists(hash) == true + assert ds.exists('nonexistent') == false +} + +fn test_multiple_operations() ! { + mut ds := new( + path: '/tmp/dedupestor_test_multiple' + reset: true + )! + + // Store multiple values + mut values := [][]u8{} + mut hashes := []string{} + + for i in 0..5 { + value := 'test data ${i}'.bytes() + values << value + hash := ds.store(value)! + hashes << hash + } + + // Verify all values can be retrieved + for i, hash in hashes { + retrieved := ds.get(hash)! + assert retrieved == values[i] + } + + // Test deduplication by storing same values again + for i, value in values { + hash := ds.store(value)! + assert hash == hashes[i] // Should get same hash for same data + } +}