This commit is contained in:
2024-05-09 17:09:00 +03:00
parent 2b1d0dc54c
commit 2eed0b65b7
211 changed files with 154 additions and 144 deletions

View File

@@ -0,0 +1,9 @@
#!/bin/bash
for name in ./*.mmd
do
output=$(basename $name mmd)png
echo $output
mmdc -i $name -o $output -w 4096 -H 2160 -b transparant
echo $name
done

View File

@@ -0,0 +1,13 @@
graph TD
subgraph Data Origin
file[Large chunk of data = part_1part_2part_3part_4]
parta[part_1]
partb[part_2]
partc[part_3]
partd[part_4]
file -.- |split part_1|parta
file -.- |split part_2|partb
file -.- |split part 3|partc
file -.- |split part 4|partd
parta --> partb --> partc --> partd
end

View File

@@ -0,0 +1,20 @@
graph TD
subgraph Data Substitution
parta[part_1]
partb[part_2]
partc[part_3]
partd[part_4]
parta -.-> vara[ A = part_1]
partb -.-> varb[ B = part_2]
partc -.-> varc[ C = part_3]
partd -.-> vard[ D = part_4]
end
subgraph Create equations with the data parts
eq1[A + B + C + D = 6]
eq2[A + B + C - D = 3]
eq3[A + B - C - D = 10]
eq4[ A - B - C - D = -4]
eq5[ A - B + C + D = 0]
eq6[ A - B - C + D = 5]
vara & varb & varc & vard --> eq1 & eq2 & eq3 & eq4 & eq5 & eq6
end

View File

@@ -0,0 +1,44 @@
graph TD
subgraph Data Origin
file[Large chunk of data = part_1part_2part_3part_4]
parta[part_1]
partb[part_2]
partc[part_3]
partd[part_4]
file -.- |split part_1|parta
file -.- |split part_2|partb
file -.- |split part 3|partc
file -.- |split part 4|partd
parta --> partb --> partc --> partd
parta -.-> vara[ A = part_1]
partb -.-> varb[ B = part_2]
partc -.-> varc[ C = part_3]
partd -.-> vard[ D = part_4]
end
subgraph Create equations with the data parts
eq1[A + B + C + D = 6]
eq2[A + B + C - D = 3]
eq3[A + B - C - D = 10]
eq4[ A - B - C - D = -4]
eq5[ A - B + C + D = 0]
eq6[ A - B - C + D = 5]
vara & varb & varc & vard --> eq1 & eq2 & eq3 & eq4 & eq5 & eq6
end
subgraph Disk 1
eq1 --> |store the unique equation, not the parts|zdb1[A + B + C + D = 6]
end
subgraph Disk 2
eq2 --> |store the unique equation, not the parts|zdb2[A + B + C - D = 3]
end
subgraph Disk 3
eq3 --> |store the unique equation, not the parts|zdb3[A + B - C - D = 10]
end
subgraph Disk 4
eq4 --> |store the unique equation, not the parts|zdb4[A - B - C - D = -4]
end
subgraph Disk 5
eq5 --> |store the unique equation, not the parts|zdb5[ A - B + C + D = 0]
end
subgraph Disk 6
eq6 --> |store the unique equation, not the parts|zdb6[A - B - C + D = 5]
end

View File

@@ -0,0 +1,34 @@
graph TD
subgraph Local laptop, computer or server
user[End User]
protocol[Storage protocol]
qsfs[Filesystem on local OS]
0store[Quantum Safe storage engine]
end
subgraph Grid storage - metadata
etcd1[ETCD-1]
etcd2[ETCD-2]
etcd3[ETCD-3]
end
subgraph Grid storage - zero proof data
zdb1[ZDB-1]
zdb2[ZDB-2]
zdb3[ZDB-3]
zdb4[ZDB-4]
zdb5[ZDB-5]
zdb6[ZDB-6]
zdb7[ZDB-7]
user -.- protocol
protocol -.- qsfs
qsfs --- 0store
0store --- etcd1
0store --- etcd2
0store --- etcd3
0store <-.-> zdb1[ZDB-1]
0store <-.-> zdb2[ZDB-2]
0store <-.-> zdb3[ZDB-3]
0store <-.-> zdb4[ZDB-4]
0store <-.-> zdb5[ZDB-5]
0store <-.-> zdb6[ZDB-...]
0store <-.-> zdb7[ZDB-N]
end

View File

@@ -0,0 +1,9 @@
#!/bin/bash
for name in ./*.mmd
do
output=$(basename $name mmd)png
echo $output
mmdc -i $name -o $output -w 4096 -H 2160 -b transparant
echo $name
done

Binary file not shown.

After

Width:  |  Height:  |  Size: 285 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 238 KiB

View File

@@ -0,0 +1,39 @@
<!-- ![](img/filesystem_abstract.jpg) -->
![](img/qsss_intro_.jpg)
# Quantum Safe Filesystem
A redundant filesystem, can store PB's (millions of gigabytes) of information.
Unique features:
- Unlimited scalable (many petabytes) filesystem
- Quantum Safe:
- On the TFGrid, no farmer knows what the data is about
- Even a quantum computer cannot decrypt
- Data can't be lost
- Protection for [datarot](datarot), data will autorepair
- Data is kept for ever
- Data is dispersed over multiple sites
- Sites can go down, data not lost
- Up to 10x more efficient than storing on classic storage cloud systems
- Can be mounted as filesystem on any OS or any deployment system (OSX, Linux, Windows, Docker, Kubernetes, TFGrid, ...)
- Compatible with +- all data workloads (not high performance data driven workloads like a database)
- Self-healing: when a node or disk lost, storage system can get back to original redundancy level
- Helps with compliance to regulations like GDPR (as the hosting facility has no view on what is stored, information is encrypted and incomplete)
- Hybrid: can be installed onsite, public, private, ...
- Read-write caching on encoding node (the front end)
## Architecture
By using our filesystem inside a Virtual Machine or Kubernetes the TFGrid user can deploy any storage application on top e.g. Minio for S3 storage, OwnCloud as online fileserver.
![](img/qsstorage_architecture.jpg)
Any storage workload can be deployed on top of the zstor.
!!!def alias:quantumsafe_filesystem,planetary_fs,planet_fs,quantumsafe_file_system,zstor,qsfs
!!!include:qsss_toc

View File

@@ -0,0 +1,14 @@
graph TD
subgraph Data Ingress and Egress
qss[Quantum Safe Storage Engine]
end
subgraph Physical Data storage
st1[Virtual Storage Device 1]
st2[Virtual Storage Device 2]
st3[Virtual Storage Device 3]
st4[Virtual Storage Device 4]
st5[Virtual Storage Device 5]
st6[Virtual Storage Device 6]
st7[Virtual Storage Device 7]
qss -.-> st1 & st2 & st3 & st4 & st5 & st6 & st7
end

View File

@@ -0,0 +1,9 @@
#!/bin/bash
for name in ./*.mmd
do
output=$(basename $name mmd)png
echo $output
mmdc -i $name -o $output -w 4096 -H 2160 -b transparant
echo $name
done

View File

@@ -0,0 +1,13 @@
graph TD
subgraph Data Origin
file[Large chunk of data = part_1part_2part_3part_4]
parta[part_1]
partb[part_2]
partc[part_3]
partd[part_4]
file -.- |split part_1|parta
file -.- |split part_2|partb
file -.- |split part 3|partc
file -.- |split part 4|partd
parta --> partb --> partc --> partd
end

View File

@@ -0,0 +1,20 @@
graph TD
subgraph Data Substitution
parta[part_1]
partb[part_2]
partc[part_3]
partd[part_4]
parta -.-> vara[ A = part_1]
partb -.-> varb[ B = part_2]
partc -.-> varc[ C = part_3]
partd -.-> vard[ D = part_4]
end
subgraph Create equations with the data parts
eq1[A + B + C + D = 6]
eq2[A + B + C - D = 3]
eq3[A + B - C - D = 10]
eq4[ A - B - C - D = -4]
eq5[ A - B + C + D = 0]
eq6[ A - B - C + D = 5]
vara & varb & varc & vard --> eq1 & eq2 & eq3 & eq4 & eq5 & eq6
end

View File

@@ -0,0 +1,44 @@
rgraph TD
subgraph Data Origin
file[Large chunk of data = part_1part_2part_3part_4]
parta[part_1]
partb[part_2]
partc[part_3]
partd[part_4]
file -.- |split part_1|parta
file -.- |split part_2|partb
file -.- |split part 3|partc
file -.- |split part 4|partd
parta --> partb --> partc --> partd
parta -.-> vara[ A = part_1]
partb -.-> varb[ B = part_2]
partc -.-> varc[ C = part_3]
partd -.-> vard[ D = part_4]
end
subgraph Create equations with the data parts
eq1[A + B + C + D = 6]
eq2[A + B + C - D = 3]
eq3[A + B - C - D = 10]
eq4[ A - B - C - D = -4]
eq5[ A - B + C + D = 0]
eq6[ A - B - C + D = 5]
vara & varb & varc & vard --> eq1 & eq2 & eq3 & eq4 & eq5 & eq6
end
subgraph Disk 1
eq1 --> |store the unique equation, not the parts|zdb1[A + B + C + D = 6]
end
subgraph Disk 2
eq2 --> |store the unique equation, not the parts|zdb2[A + B + C - D = 3]
end
subgraph Disk 3
eq3 --> |store the unique equation, not the parts|zdb3[A + B - C - D = 10]
end
subgraph Disk 4
eq4 --> |store the unique equation, not the parts|zdb4[A - B - C - D = -4]
end
subgraph Disk 5
eq5 --> |store the unique equation, not the parts|zdb5[ A - B + C + D = 0]
end
subgraph Disk 6
eq6 --> |store the unique equation, not the parts|zdb6[A - B - C + D = 5]
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 145 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

View File

@@ -0,0 +1,82 @@
# Quantum Safe Storage Algoritm
![](img/tf_banner_grid_.jpg)
The Quantum Safe Storage Algorithm is the heart of the Storage engine. The storage engine takes the original data objects and creates data part descriptions that it stores over many virtual storage devices (ZDB/s)
![](../img/.jpg)
Data gets stored over multiple ZDB's in such a way that data can never be lost.
Unique features
- data always append, can never be lost
- even a quantum computer cannot decrypt the data
- is spread over multiple sites, sites can be lost, data will still be available
- protects for [datarot](datarot)
### Why
Today we produce more data than ever before. We could not continue to make full copies of data to make sure it is stored reliably. This will simply not scale. We need to move from securing the whole dataset to securing all the objects that make up a dataset.
ThreeFold is using space technology to store data (fragments) over multiple devices (physical storage devices in 3Nodes). The solution does not distribute and store parts of an object (file, photo, movie...) but describes the part of an object. This could be visualized by thinking of it as equations.
### Details
Let a,b,c,d.... be the parts of that original object. You could create endless unique equations using these parts. A simple example: let's assume we have 3 parts of original objects that have the following values:
```
a=1
b=2
c=3
```
(and for reference that part of real-world objects is not a simple number like `1` but a unique digital number describing the part, like the binary code for it `110101011101011101010111101110111100001010101111011.....`). With these numbers we could create endless amounts of equations:
```
1: a+b+c=6
2: c-b-a=0
3: b-c+a=0
4: 2b+a-c=2
5: 5c-b-a=12
......
```
Mathematically we only need 3 to describe the content (=value) of the fragments. But creating more adds reliability. Now store those equations distributed (one equation per physical storage device) and forget the original object. So we no longer have access to the values of a, b, c and see and we just remember the locations of all the equations created with the original data fragments. Mathematically we need three equations (any 3 of the total) to recover the original values for a, b or c. So do a request to retrieve 3 of the many equations and the first 3 to arrive are good enough to recalculate the original values. Three randomly retrieved equations are:
```
5c-b-a=12
b-c+a=0
2b+a-c=2
```
And this is a mathematical system we could solve:
- First: `b-c+a=0 -> b=c-a`
- Second: `2b+a-c=2 -> c=2b+a-2 -> c=2(c-a)+a-2 -> c=2c-2a+a-2 -> c=a+2`
- Third: `5c-b-a=12 -> 5(a+2)-(c-a)-a=12 -> 5a+10-(a+2)+a-a=12 -> 5a-a-2=2 -> 4a=4 -> a=1`
Now that we know `a=1` we could solve the rest `c=a+2=3` and `b=c-a=2`. And we have from 3 random equations regenerated the original fragments and could now recreate the original object.
The redundancy and reliability in such system comes in the form of creating (more than needed) equations and storing them. As shown these equations in any random order could recreate the original fragments and therefore
redundancy comes in at a much lower overhead.
### Example of 16/4
![](img/quantumsafe_storage_algo.jpg)
Each object is fragmented into 16 parts. So we have 16 original fragments for which we need 16 equations to mathematically describe them. Now let's make 20 equations and store them dispersedly on 20 devices. To recreate the original object we only need 16 equations, the first 16 that we find and collect which allows us to recover the fragment and in the end the original object. We could lose any 4 of those original 20 equations.
The likelihood of losing 4 independent, dispersed storage devices at the same time is very low. Since we have continuous monitoring of all of the stored equations, we could create additional equations immediately when one of them is missing, making it an auto-regeneration of lost data and a self-repairing storage system. The overhead in this example is 4 out of 20 which is a mere **20%** instead of (up to) **400%.**
### Content distribution Policy (10/50)
This system can be used as backend for content delivery networks.
Imagine a movie being stored on 60 locations from which we can loose 50 at the same time.
If someone now wants to download the data the first 10 locations who answer fastest will provide enough of the data parts to allow the data to be rebuild.
The overhead here is much more compared to previous example but stil order of magnitude lower compared to other cdn systems.
!!!def alias:quantumsafe_storage_algo,quantumsafe_storage_algorithm,space_algo,space_algorithm,quantum_safe_storage_algo,qs_algo,qs_codec
!!!include:qsss_toc

View File

@@ -0,0 +1,8 @@
# Datarot Cannot Happen on our Storage System
Fact that data storage degrades over time and becomes unreadable, on e.g. a harddisk.
The storage system provided by ThreeFold intercepts this silent data corruption, making that it can pass by unnotified.
> see also https://en.wikipedia.org/wiki/Data_degradation
!!!def alias:bitrot,datarot

View File

@@ -0,0 +1,11 @@
# Zero Knowledge Proof Storage system.
The quantum save storage system is zero knowledge proof compliant. The storage system is made up / split into 2 components: The actual storage devices use to store the data (ZDB's) and the Quantum Safe Storage engine.
![](img/qss_system.jpg)
The zero proof knowledge compliancy comes from the fact the all the physical storage nodes (3nodes) can proof that they store a valid part of what data the quantum safe storage engine (QSSE) has stored on multiple independent devices. The QSSE can validate that all the QSSE storage devices have a valid part of the original information. The storage devices however have no idea what the original stored data is as they only have a part (description) of the origina data and have no access to the original data part or the complete origal data objects.
!!!def