347 lines
11 KiB
Rust
347 lines
11 KiB
Rust
use std::fs::{self, File, OpenOptions};
|
|
use std::io::{Read, Seek, SeekFrom, Write};
|
|
|
|
|
|
use crc32fast::Hasher;
|
|
|
|
use crate::error::Error;
|
|
use crate::location::Location;
|
|
use crate::OurDB;
|
|
|
|
// Header size: 2 bytes (size) + 4 bytes (CRC32) + 6 bytes (previous location)
|
|
pub const HEADER_SIZE: usize = 12;
|
|
|
|
impl OurDB {
|
|
/// Selects and opens a database file for read/write operations
|
|
pub(crate) fn db_file_select(&mut self, file_nr: u16) -> Result<(), Error> {
|
|
// No need to check if file_nr > 65535 as u16 can't exceed that value
|
|
|
|
let path = self.path.join(format!("{}.db", file_nr));
|
|
|
|
// Always close the current file if it's open
|
|
self.file = None;
|
|
|
|
// Create file if it doesn't exist
|
|
if !path.exists() {
|
|
self.create_new_db_file(file_nr)?;
|
|
}
|
|
|
|
// Open the file fresh
|
|
let file = OpenOptions::new()
|
|
.read(true)
|
|
.write(true)
|
|
.open(&path)?;
|
|
|
|
self.file = Some(file);
|
|
self.file_nr = file_nr;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Creates a new database file
|
|
pub(crate) fn create_new_db_file(&mut self, file_nr: u16) -> Result<(), Error> {
|
|
let new_file_path = self.path.join(format!("{}.db", file_nr));
|
|
let mut file = File::create(&new_file_path)?;
|
|
|
|
// Write a single byte to make all positions start from 1
|
|
file.write_all(&[0u8])?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Gets the file number to use for the next write operation
|
|
pub(crate) fn get_file_nr(&mut self) -> Result<u16, Error> {
|
|
// For keysize 2, 3, or 4, we can only use file_nr 0
|
|
if self.lookup.keysize() <= 4 {
|
|
let path = self.path.join("0.db");
|
|
|
|
if !path.exists() {
|
|
self.create_new_db_file(0)?;
|
|
}
|
|
|
|
return Ok(0);
|
|
}
|
|
|
|
// For keysize 6, we can use multiple files
|
|
let path = self.path.join(format!("{}.db", self.last_used_file_nr));
|
|
|
|
if !path.exists() {
|
|
self.create_new_db_file(self.last_used_file_nr)?;
|
|
return Ok(self.last_used_file_nr);
|
|
}
|
|
|
|
let metadata = fs::metadata(&path)?;
|
|
if metadata.len() >= self.file_size as u64 {
|
|
self.last_used_file_nr += 1;
|
|
self.create_new_db_file(self.last_used_file_nr)?;
|
|
}
|
|
|
|
Ok(self.last_used_file_nr)
|
|
}
|
|
|
|
/// Stores data at the specified ID with history tracking
|
|
pub(crate) fn set_(&mut self, id: u32, old_location: Location, data: &[u8]) -> Result<(), Error> {
|
|
// Validate data size - maximum is u16::MAX (65535 bytes or ~64KB)
|
|
if data.len() > u16::MAX as usize {
|
|
return Err(Error::InvalidOperation(
|
|
format!("Data size exceeds maximum allowed size of {} bytes", u16::MAX)
|
|
));
|
|
}
|
|
|
|
// Get file number to use
|
|
let file_nr = self.get_file_nr()?;
|
|
|
|
// Select the file
|
|
self.db_file_select(file_nr)?;
|
|
|
|
// Get current file position for lookup
|
|
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
|
|
file.seek(SeekFrom::End(0))?;
|
|
let position = file.stream_position()? as u32;
|
|
|
|
// Create new location
|
|
let new_location = Location {
|
|
file_nr,
|
|
position,
|
|
};
|
|
|
|
// Calculate CRC of data
|
|
let crc = calculate_crc(data);
|
|
|
|
// Create header
|
|
let mut header = vec![0u8; HEADER_SIZE];
|
|
|
|
// Write size (2 bytes)
|
|
let size = data.len() as u16; // Safe now because we've validated the size
|
|
header[0] = (size & 0xFF) as u8;
|
|
header[1] = ((size >> 8) & 0xFF) as u8;
|
|
|
|
// Write CRC (4 bytes)
|
|
header[2] = (crc & 0xFF) as u8;
|
|
header[3] = ((crc >> 8) & 0xFF) as u8;
|
|
header[4] = ((crc >> 16) & 0xFF) as u8;
|
|
header[5] = ((crc >> 24) & 0xFF) as u8;
|
|
|
|
// Write previous location (6 bytes)
|
|
let prev_bytes = old_location.to_bytes();
|
|
for (i, &byte) in prev_bytes.iter().enumerate().take(6) {
|
|
header[6 + i] = byte;
|
|
}
|
|
|
|
// Write header
|
|
file.write_all(&header)?;
|
|
|
|
// Write actual data
|
|
file.write_all(data)?;
|
|
file.flush()?;
|
|
|
|
// Update lookup table with new position
|
|
self.lookup.set(id, new_location)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Retrieves data at the specified location
|
|
pub(crate) fn get_(&mut self, location: Location) -> Result<Vec<u8>, Error> {
|
|
if location.position == 0 {
|
|
return Err(Error::NotFound(format!("Record not found, location: {:?}", location)));
|
|
}
|
|
|
|
// Select the file
|
|
self.db_file_select(location.file_nr)?;
|
|
|
|
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
|
|
|
|
// Read header
|
|
file.seek(SeekFrom::Start(location.position as u64))?;
|
|
let mut header = vec![0u8; HEADER_SIZE];
|
|
file.read_exact(&mut header)?;
|
|
|
|
// Parse size (2 bytes)
|
|
let size = u16::from(header[0]) | (u16::from(header[1]) << 8);
|
|
|
|
// Parse CRC (4 bytes)
|
|
let stored_crc = u32::from(header[2])
|
|
| (u32::from(header[3]) << 8)
|
|
| (u32::from(header[4]) << 16)
|
|
| (u32::from(header[5]) << 24);
|
|
|
|
// Read data
|
|
let mut data = vec![0u8; size as usize];
|
|
file.read_exact(&mut data)?;
|
|
|
|
// Verify CRC
|
|
let calculated_crc = calculate_crc(&data);
|
|
if calculated_crc != stored_crc {
|
|
return Err(Error::DataCorruption("CRC mismatch: data corruption detected".to_string()));
|
|
}
|
|
|
|
Ok(data)
|
|
}
|
|
|
|
/// Retrieves the previous position for a record (for history tracking)
|
|
pub(crate) fn get_prev_pos_(&mut self, location: Location) -> Result<Location, Error> {
|
|
if location.position == 0 {
|
|
return Err(Error::NotFound("Record not found".to_string()));
|
|
}
|
|
|
|
// Select the file
|
|
self.db_file_select(location.file_nr)?;
|
|
|
|
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
|
|
|
|
// Skip size and CRC (6 bytes)
|
|
file.seek(SeekFrom::Start(location.position as u64 + 6))?;
|
|
|
|
// Read previous location (6 bytes)
|
|
let mut prev_bytes = vec![0u8; 6];
|
|
file.read_exact(&mut prev_bytes)?;
|
|
|
|
// Create location from bytes
|
|
Location::from_bytes(&prev_bytes, 6)
|
|
}
|
|
|
|
/// Deletes the record at the specified location
|
|
pub(crate) fn delete_(&mut self, id: u32, location: Location) -> Result<(), Error> {
|
|
if location.position == 0 {
|
|
return Err(Error::NotFound("Record not found".to_string()));
|
|
}
|
|
|
|
// Select the file
|
|
self.db_file_select(location.file_nr)?;
|
|
|
|
let file = self.file.as_mut().ok_or_else(|| Error::Other("No file open".to_string()))?;
|
|
|
|
// Read size first
|
|
file.seek(SeekFrom::Start(location.position as u64))?;
|
|
let mut size_bytes = vec![0u8; 2];
|
|
file.read_exact(&mut size_bytes)?;
|
|
let size = u16::from(size_bytes[0]) | (u16::from(size_bytes[1]) << 8);
|
|
|
|
// Write zeros for the entire record (header + data)
|
|
let zeros = vec![0u8; HEADER_SIZE + size as usize];
|
|
file.seek(SeekFrom::Start(location.position as u64))?;
|
|
file.write_all(&zeros)?;
|
|
|
|
// Clear lookup entry
|
|
self.lookup.delete(id)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Condenses the database by removing empty records and updating positions
|
|
pub fn condense(&mut self) -> Result<(), Error> {
|
|
// Create a temporary directory
|
|
let temp_path = self.path.join("temp");
|
|
fs::create_dir_all(&temp_path)?;
|
|
|
|
// Get all file numbers
|
|
let mut file_numbers = Vec::new();
|
|
for entry in fs::read_dir(&self.path)? {
|
|
let entry = entry?;
|
|
let path = entry.path();
|
|
|
|
if path.is_file() && path.extension().map_or(false, |ext| ext == "db") {
|
|
if let Some(stem) = path.file_stem() {
|
|
if let Ok(file_nr) = stem.to_string_lossy().parse::<u16>() {
|
|
file_numbers.push(file_nr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process each file
|
|
for file_nr in file_numbers {
|
|
let src_path = self.path.join(format!("{}.db", file_nr));
|
|
let temp_file_path = temp_path.join(format!("{}.db", file_nr));
|
|
|
|
// Create new file
|
|
let mut temp_file = File::create(&temp_file_path)?;
|
|
temp_file.write_all(&[0u8])?; // Initialize with a byte
|
|
|
|
// Open source file
|
|
let mut src_file = File::open(&src_path)?;
|
|
|
|
// Read and process records
|
|
let mut buffer = vec![0u8; 1024]; // Read in chunks
|
|
let mut _position = 0;
|
|
|
|
while let Ok(bytes_read) = src_file.read(&mut buffer) {
|
|
if bytes_read == 0 {
|
|
break;
|
|
}
|
|
|
|
// Process the chunk
|
|
// This is a simplified version - in a real implementation,
|
|
// you would need to handle records that span chunk boundaries
|
|
|
|
_position += bytes_read;
|
|
}
|
|
|
|
// TODO: Implement proper record copying and position updating
|
|
// This would involve:
|
|
// 1. Reading each record from the source file
|
|
// 2. If not deleted (all zeros), copy to temp file
|
|
// 3. Update lookup table with new positions
|
|
}
|
|
|
|
// TODO: Replace original files with temp files
|
|
|
|
// Clean up
|
|
fs::remove_dir_all(&temp_path)?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// Calculates CRC32 for the data
|
|
fn calculate_crc(data: &[u8]) -> u32 {
|
|
let mut hasher = Hasher::new();
|
|
hasher.update(data);
|
|
hasher.finalize()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::path::PathBuf;
|
|
|
|
use crate::{OurDB, OurDBConfig, OurDBSetArgs};
|
|
use std::env::temp_dir;
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
|
|
fn get_temp_dir() -> PathBuf {
|
|
let timestamp = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap()
|
|
.as_secs();
|
|
temp_dir().join(format!("ourdb_backend_test_{}", timestamp))
|
|
}
|
|
|
|
#[test]
|
|
fn test_backend_operations() {
|
|
let temp_dir = get_temp_dir();
|
|
|
|
let config = OurDBConfig {
|
|
path: temp_dir.clone(),
|
|
incremental_mode: false,
|
|
file_size: None,
|
|
keysize: None,
|
|
reset: None, // Don't reset existing database
|
|
};
|
|
|
|
let mut db = OurDB::new(config).unwrap();
|
|
|
|
// Test set and get
|
|
let test_data = b"Test data for backend operations";
|
|
let id = 1;
|
|
|
|
db.set(OurDBSetArgs { id: Some(id), data: test_data }).unwrap();
|
|
|
|
let retrieved = db.get(id).unwrap();
|
|
assert_eq!(retrieved, test_data);
|
|
|
|
// Clean up
|
|
db.destroy().unwrap();
|
|
}
|
|
}
|