db/ourdb/src/backend.rs
2025-04-21 03:26:38 +02:00

347 lines
11 KiB
Rust

use std::fs::{self, File, OpenOptions};
use std::io::{Read, Seek, SeekFrom, Write};
use crc32fast::Hasher;
use crate::error::Error;
use crate::location::Location;
use crate::OurDB;
// Header size: 2 bytes (size) + 4 bytes (CRC32) + 6 bytes (previous location)
pub const HEADER_SIZE: usize = 12;
impl OurDB {
/// Selects and opens a database file for read/write operations
pub(crate) fn db_file_select(&mut self, file_nr: u16) -> Result<(), Error> {
// No need to check if file_nr > 65535 as u16 can't exceed that value
let path = self.path.join(format!("{}.db", file_nr));
// Always close the current file if it's open
self.file = None;
// Create file if it doesn't exist
if !path.exists() {
self.create_new_db_file(file_nr)?;
}
// Open the file fresh
let file = OpenOptions::new()
.read(true)
.write(true)
.open(&path)?;
self.file = Some(file);
self.file_nr = file_nr;
Ok(())
}
/// Creates a new database file
pub(crate) fn create_new_db_file(&mut self, file_nr: u16) -> Result<(), Error> {
let new_file_path = self.path.join(format!("{}.db", file_nr));
let mut file = File::create(&new_file_path)?;
// Write a single byte to make all positions start from 1
file.write_all(&[0u8])?;
Ok(())
}
/// Gets the file number to use for the next write operation
pub(crate) fn get_file_nr(&mut self) -> Result<u16, Error> {
// For keysize 2, 3, or 4, we can only use file_nr 0
if self.lookup.keysize() <= 4 {
let path = self.path.join("0.db");
if !path.exists() {
self.create_new_db_file(0)?;
}
return Ok(0);
}
// For keysize 6, we can use multiple files
let path = self.path.join(format!("{}.db", self.last_used_file_nr));
if !path.exists() {
self.create_new_db_file(self.last_used_file_nr)?;
return Ok(self.last_used_file_nr);
}
let metadata = fs::metadata(&path)?;
if metadata.len() >= self.file_size as u64 {
self.last_used_file_nr += 1;
self.create_new_db_file(self.last_used_file_nr)?;
}
Ok(self.last_used_file_nr)
}
/// Stores data at the specified ID with history tracking
pub(crate) fn set_(&mut self, id: u32, old_location: Location, data: &[u8]) -> Result<(), Error> {
// Validate data size - maximum is u16::MAX (65535 bytes or ~64KB)
if data.len() > u16::MAX as usize {
return Err(Error::InvalidOperation(
format!("Data size exceeds maximum allowed size of {} bytes", u16::MAX)
));
}
// Get file number to use
let file_nr = self.get_file_nr()?;
// Select the file
self.db_file_select(file_nr)?;
// Get current file position for lookup
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
file.seek(SeekFrom::End(0))?;
let position = file.stream_position()? as u32;
// Create new location
let new_location = Location {
file_nr,
position,
};
// Calculate CRC of data
let crc = calculate_crc(data);
// Create header
let mut header = vec![0u8; HEADER_SIZE];
// Write size (2 bytes)
let size = data.len() as u16; // Safe now because we've validated the size
header[0] = (size & 0xFF) as u8;
header[1] = ((size >> 8) & 0xFF) as u8;
// Write CRC (4 bytes)
header[2] = (crc & 0xFF) as u8;
header[3] = ((crc >> 8) & 0xFF) as u8;
header[4] = ((crc >> 16) & 0xFF) as u8;
header[5] = ((crc >> 24) & 0xFF) as u8;
// Write previous location (6 bytes)
let prev_bytes = old_location.to_bytes();
for (i, &byte) in prev_bytes.iter().enumerate().take(6) {
header[6 + i] = byte;
}
// Write header
file.write_all(&header)?;
// Write actual data
file.write_all(data)?;
file.flush()?;
// Update lookup table with new position
self.lookup.set(id, new_location)?;
Ok(())
}
/// Retrieves data at the specified location
pub(crate) fn get_(&mut self, location: Location) -> Result<Vec<u8>, Error> {
if location.position == 0 {
return Err(Error::NotFound(format!("Record not found, location: {:?}", location)));
}
// Select the file
self.db_file_select(location.file_nr)?;
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
// Read header
file.seek(SeekFrom::Start(location.position as u64))?;
let mut header = vec![0u8; HEADER_SIZE];
file.read_exact(&mut header)?;
// Parse size (2 bytes)
let size = u16::from(header[0]) | (u16::from(header[1]) << 8);
// Parse CRC (4 bytes)
let stored_crc = u32::from(header[2])
| (u32::from(header[3]) << 8)
| (u32::from(header[4]) << 16)
| (u32::from(header[5]) << 24);
// Read data
let mut data = vec![0u8; size as usize];
file.read_exact(&mut data)?;
// Verify CRC
let calculated_crc = calculate_crc(&data);
if calculated_crc != stored_crc {
return Err(Error::DataCorruption("CRC mismatch: data corruption detected".to_string()));
}
Ok(data)
}
/// Retrieves the previous position for a record (for history tracking)
pub(crate) fn get_prev_pos_(&mut self, location: Location) -> Result<Location, Error> {
if location.position == 0 {
return Err(Error::NotFound("Record not found".to_string()));
}
// Select the file
self.db_file_select(location.file_nr)?;
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
// Skip size and CRC (6 bytes)
file.seek(SeekFrom::Start(location.position as u64 + 6))?;
// Read previous location (6 bytes)
let mut prev_bytes = vec![0u8; 6];
file.read_exact(&mut prev_bytes)?;
// Create location from bytes
Location::from_bytes(&prev_bytes, 6)
}
/// Deletes the record at the specified location
pub(crate) fn delete_(&mut self, id: u32, location: Location) -> Result<(), Error> {
if location.position == 0 {
return Err(Error::NotFound("Record not found".to_string()));
}
// Select the file
self.db_file_select(location.file_nr)?;
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
// Read size first
file.seek(SeekFrom::Start(location.position as u64))?;
let mut size_bytes = vec![0u8; 2];
file.read_exact(&mut size_bytes)?;
let size = u16::from(size_bytes[0]) | (u16::from(size_bytes[1]) << 8);
// Write zeros for the entire record (header + data)
let zeros = vec![0u8; HEADER_SIZE + size as usize];
file.seek(SeekFrom::Start(location.position as u64))?;
file.write_all(&zeros)?;
// Clear lookup entry
self.lookup.delete(id)?;
Ok(())
}
/// Condenses the database by removing empty records and updating positions
pub fn condense(&mut self) -> Result<(), Error> {
// Create a temporary directory
let temp_path = self.path.join("temp");
fs::create_dir_all(&temp_path)?;
// Get all file numbers
let mut file_numbers = Vec::new();
for entry in fs::read_dir(&self.path)? {
let entry = entry?;
let path = entry.path();
if path.is_file() && path.extension().map_or(false, |ext| ext == "db") {
if let Some(stem) = path.file_stem() {
if let Ok(file_nr) = stem.to_string_lossy().parse::<u16>() {
file_numbers.push(file_nr);
}
}
}
}
// Process each file
for file_nr in file_numbers {
let src_path = self.path.join(format!("{}.db", file_nr));
let temp_file_path = temp_path.join(format!("{}.db", file_nr));
// Create new file
let mut temp_file = File::create(&temp_file_path)?;
temp_file.write_all(&[0u8])?; // Initialize with a byte
// Open source file
let mut src_file = File::open(&src_path)?;
// Read and process records
let mut buffer = vec![0u8; 1024]; // Read in chunks
let mut _position = 0;
while let Ok(bytes_read) = src_file.read(&mut buffer) {
if bytes_read == 0 {
break;
}
// Process the chunk
// This is a simplified version - in a real implementation,
// you would need to handle records that span chunk boundaries
_position += bytes_read;
}
// TODO: Implement proper record copying and position updating
// This would involve:
// 1. Reading each record from the source file
// 2. If not deleted (all zeros), copy to temp file
// 3. Update lookup table with new positions
}
// TODO: Replace original files with temp files
// Clean up
fs::remove_dir_all(&temp_path)?;
Ok(())
}
}
/// Calculates CRC32 for the data
fn calculate_crc(data: &[u8]) -> u32 {
let mut hasher = Hasher::new();
hasher.update(data);
hasher.finalize()
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::{OurDB, OurDBConfig, OurDBSetArgs};
use std::env::temp_dir;
use std::time::{SystemTime, UNIX_EPOCH};
fn get_temp_dir() -> PathBuf {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
temp_dir().join(format!("ourdb_backend_test_{}", timestamp))
}
#[test]
fn test_backend_operations() {
let temp_dir = get_temp_dir();
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: false,
file_size: None,
keysize: None,
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config).unwrap();
// Test set and get
let test_data = b"Test data for backend operations";
let id = 1;
db.set(OurDBSetArgs { id: Some(id), data: test_data }).unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, test_data);
// Clean up
db.destroy().unwrap();
}
}