This commit is contained in:
2025-02-24 06:34:38 -07:00
parent 6a2e143b98
commit fff14183a4
15 changed files with 341 additions and 12 deletions

View File

@@ -23,14 +23,3 @@ model.sheet.export(path:"~/code/github/freeflowuniverse/starlight_template/src/c
// report := model.new_report(
// name: 'example_report'
// title: 'Example Business Model'
// )!
// report.export(
// path: build_path
// overwrite: true
// format: .docusaurus
// )!

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env -S v -n -w -cg -gc none -no-retry-compilation -cc tcc -d use_openssl -enable-globals run
//#!/usr/bin/env -S v -cg -enable-globals run
import freeflowuniverse.herolib.biz.bizmodel
import freeflowuniverse.herolib.core.playbook
import freeflowuniverse.herolib.core.playcmds
import os
//TODO: need to fix wrong location
const playbook_path = os.dir(@FILE) + '/playbook'
const build_path = os.join_path(os.dir(@FILE), '/docusaurus')
buildpath := '${os.home_dir()}/hero/var/mdbuild/bizmodel'
mut model := bizmodel.getset("example")!
model.workdir = build_path
model.play(mut playbook.new(path: playbook_path)!)!
println(model.sheet)
println(model.sheet.export()!)
// model.sheet.export(path:"~/Downloads/test.csv")!
// model.sheet.export(path:"~/code/github/freeflowuniverse/starlight_template/src/content/test.csv")!
report := model.new_report(
name: 'example_report'
title: 'Example Business Model'
)!
report.export(
path: build_path
overwrite: true
format: .docusaurus
)!

View File

@@ -6,6 +6,8 @@ This company is a cloud company ...
- name, e.g. for a specific project
- descr, description of the revenue line item
- revenue_items: does one of revenue, is not exterpolated
- revenue_growth: is a revenue stream which is being extrapolated
- revenue_setup, revenue for 1 item '1000usd'
- revenue_setup_delay
- revenue_monthly, revenue per month for 1 item
@@ -25,7 +27,7 @@ This company is a cloud company ...
```js
!!bizmodel.revenue_define bizname:'test'
descr:'OEM Deals'
revenue_setup:'10:1000000EUR,15:3333,20:1200000'
revenue_items:'10:1000000EUR,15:3333,20:1200000'
cogs_setup_perc: '1:5%,20:10%'
!!bizmodel.revenue_define bizname:'test'

View File

@@ -0,0 +1,94 @@
# DedupeStore
DedupeStore is a content-addressable key-value store with built-in deduplication. It uses blake2b-160 content hashing to identify and deduplicate data, making it ideal for storing files or data blocks where the same content might appear multiple times.
## Features
- Content-based deduplication using blake2b-160 hashing
- Efficient storage using RadixTree for hash lookups
- Persistent storage using OurDB
- Maximum value size limit of 1MB
- Fast retrieval of data using content hash
- Automatic deduplication of identical content
## Usage
```v
import freeflowuniverse.herolib.data.dedupestor
fn main() ! {
// Create a new dedupestore
mut ds := dedupestor.new(
path: 'path/to/store'
reset: false // Set to true to reset existing data
)!
// Store some data
data := 'Hello, World!'.bytes()
hash := ds.store(data)!
println('Stored data with hash: ${hash}')
// Retrieve data using hash
retrieved := ds.get(hash)!
println('Retrieved data: ${retrieved.bytestr()}')
// Check if data exists
exists := ds.exists(hash)
println('Data exists: ${exists}')
// Attempting to store the same data again returns the same hash
same_hash := ds.store(data)!
assert hash == same_hash // True, data was deduplicated
}
```
## Implementation Details
DedupeStore uses two main components for storage:
1. **RadixTree**: Stores mappings from content hashes to data location IDs
2. **OurDB**: Stores the actual data blocks
When storing data:
1. The data is hashed using blake2b-160
2. If the hash exists in the RadixTree, the existing data location is returned
3. If the hash is new:
- Data is stored in OurDB, getting a new location ID
- Hash -> ID mapping is stored in RadixTree
- The hash is returned
When retrieving data:
1. The RadixTree is queried with the hash to get the data location ID
2. The data is retrieved from OurDB using the ID
## Size Limits
- Maximum value size: 1MB
- Attempting to store larger values will result in an error
## Error Handling
The store methods return results that should be handled with V's error handling:
```v
// Handle potential errors
if hash := ds.store(large_data) {
// Success
println('Stored with hash: ${hash}')
} else {
// Error occurred
println('Error: ${err}')
}
```
## Testing
The module includes comprehensive tests covering:
- Basic store/retrieve operations
- Deduplication functionality
- Size limit enforcement
- Edge cases
Run tests with:
```bash
v test lib/data/dedupestor/

View File

@@ -0,0 +1,99 @@
module dedupestor
import crypto.blake2b
import freeflowuniverse.herolib.data.radixtree
import freeflowuniverse.herolib.data.ourdb
pub const max_value_size = 1024 * 1024 // 1MB
// DedupeStore provides a key-value store with deduplication based on content hashing
pub struct DedupeStore {
mut:
radix &radixtree.RadixTree // For storing hash -> id mappings
data &ourdb.OurDB // For storing the actual data
}
@[params]
pub struct NewArgs {
pub mut:
path string // Base path for the store
reset bool // Whether to reset existing data
}
// new creates a new deduplication store
pub fn new(args NewArgs) !&DedupeStore {
// Create the radixtree for hash -> id mapping
mut rt := radixtree.new(
path: '${args.path}/radixtree'
reset: args.reset
)!
// Create the ourdb for actual data storage
mut db := ourdb.new(
path: '${args.path}/data'
record_size_max: max_value_size
incremental_mode: true // We want auto-incrementing IDs
reset: args.reset
)!
return &DedupeStore{
radix: rt
data: db
}
}
// store stores a value and returns its hash
// If the value already exists (same hash), returns the existing hash without storing again
pub fn (mut ds DedupeStore) store(value []u8) !string {
// Check size limit
if value.len > max_value_size {
return error('value size exceeds maximum allowed size of 1MB')
}
// Calculate blake160 hash of the value
hash := blake2b.sum160(value).hex()
// Check if this hash already exists
if _ := ds.radix.search(hash) {
// Value already exists, return the hash
return hash
}
// Store the actual data in ourdb
id := ds.data.set(data: value)!
// Convert id to bytes for storage in radixtree
id_bytes := u32_to_bytes(id)
// Store the mapping of hash -> id in radixtree
ds.radix.insert(hash, id_bytes)!
return hash
}
// get retrieves a value by its hash
pub fn (mut ds DedupeStore) get(hash string) ![]u8 {
// Get the ID from radixtree
id_bytes := ds.radix.search(hash)!
// Convert bytes back to u32 id
id := bytes_to_u32(id_bytes)
// Get the actual data from ourdb
return ds.data.get(id)!
}
// exists checks if a value with the given hash exists
pub fn (mut ds DedupeStore) exists(hash string) bool {
return if _ := ds.radix.search(hash) { true } else { false }
}
// Helper function to convert u32 to []u8
fn u32_to_bytes(n u32) []u8 {
return [u8(n), u8(n >> 8), u8(n >> 16), u8(n >> 24)]
}
// Helper function to convert []u8 to u32
fn bytes_to_u32(b []u8) u32 {
return u32(b[0]) | (u32(b[1]) << 8) | (u32(b[2]) << 16) | (u32(b[3]) << 24)
}

View File

@@ -0,0 +1,108 @@
module dedupestor
import os
fn testsuite_begin() ! {
// Ensure test directories exist and are clean
test_dirs := [
'/tmp/dedupestor_test',
'/tmp/dedupestor_test_size',
'/tmp/dedupestor_test_exists',
'/tmp/dedupestor_test_multiple'
]
for dir in test_dirs {
if os.exists(dir) {
os.rmdir_all(dir) or {}
}
os.mkdir_all(dir) or {}
}
}
fn test_basic_operations() ! {
mut ds := new(
path: '/tmp/dedupestor_test'
reset: true
)!
// Test storing and retrieving data
value1 := 'test data 1'.bytes()
hash1 := ds.store(value1)!
retrieved1 := ds.get(hash1)!
assert retrieved1 == value1
// Test deduplication
hash2 := ds.store(value1)!
assert hash1 == hash2 // Should return same hash for same data
// Test different data gets different hash
value2 := 'test data 2'.bytes()
hash3 := ds.store(value2)!
assert hash1 != hash3 // Should be different hash for different data
retrieved2 := ds.get(hash3)!
assert retrieved2 == value2
}
fn test_size_limit() ! {
mut ds := new(
path: '/tmp/dedupestor_test_size'
reset: true
)!
// Test data under size limit (1KB)
small_data := []u8{len: 1024, init: u8(index)}
small_hash := ds.store(small_data)!
retrieved := ds.get(small_hash)!
assert retrieved == small_data
// Test data over size limit (2MB)
large_data := []u8{len: 2 * 1024 * 1024, init: u8(index)}
if _ := ds.store(large_data) {
assert false, 'Expected error for data exceeding size limit'
}
}
fn test_exists() ! {
mut ds := new(
path: '/tmp/dedupestor_test_exists'
reset: true
)!
value := 'test data'.bytes()
hash := ds.store(value)!
assert ds.exists(hash) == true
assert ds.exists('nonexistent') == false
}
fn test_multiple_operations() ! {
mut ds := new(
path: '/tmp/dedupestor_test_multiple'
reset: true
)!
// Store multiple values
mut values := [][]u8{}
mut hashes := []string{}
for i in 0..5 {
value := 'test data ${i}'.bytes()
values << value
hash := ds.store(value)!
hashes << hash
}
// Verify all values can be retrieved
for i, hash in hashes {
retrieved := ds.get(hash)!
assert retrieved == values[i]
}
// Test deduplication by storing same values again
for i, value in values {
hash := ds.store(value)!
assert hash == hashes[i] // Should get same hash for same data
}
}