move repos into monorepo

This commit is contained in:
Timur Gordon
2025-11-13 20:44:00 +01:00
commit 4b23e5eb7f
204 changed files with 33737 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
pub mod query;
pub mod search;
pub use query::RetrievalQuery;
pub use search::SearchEngine;

View File

@@ -0,0 +1,74 @@
/// Retrieval query structure
#[derive(Clone, Debug)]
pub struct RetrievalQuery {
/// Optional text query for keyword substring matching
pub text: Option<String>,
/// Namespace to search in
pub ns: String,
/// Field filters (key=value pairs)
pub filters: Vec<(String, String)>,
/// Maximum number of results to return
pub top_k: usize,
}
impl RetrievalQuery {
/// Create a new retrieval query
pub fn new(ns: String) -> Self {
Self {
text: None,
ns,
filters: Vec::new(),
top_k: 10,
}
}
/// Set the text query
pub fn with_text(mut self, text: String) -> Self {
self.text = Some(text);
self
}
/// Add a filter
pub fn with_filter(mut self, key: String, value: String) -> Self {
self.filters.push((key, value));
self
}
/// Set the maximum number of results
pub fn with_top_k(mut self, top_k: usize) -> Self {
self.top_k = top_k;
self
}
}
/// Search result
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub struct SearchResult {
/// Object ID
pub id: String,
/// Match score (0.0 to 1.0)
pub score: f32,
/// Matched text snippet (if applicable)
#[serde(skip_serializing_if = "Option::is_none")]
pub snippet: Option<String>,
}
impl SearchResult {
pub fn new(id: String, score: f32) -> Self {
Self {
id,
score,
snippet: None,
}
}
pub fn with_snippet(mut self, snippet: String) -> Self {
self.snippet = Some(snippet);
self
}
}

View File

@@ -0,0 +1,150 @@
use crate::error::Result;
use crate::index::FieldIndex;
use crate::retrieve::query::{RetrievalQuery, SearchResult};
use crate::store::{HeroDbClient, OsirisObject};
/// Search engine for OSIRIS
pub struct SearchEngine {
client: HeroDbClient,
index: FieldIndex,
}
impl SearchEngine {
/// Create a new search engine
pub fn new(client: HeroDbClient) -> Self {
let index = FieldIndex::new(client.clone());
Self { client, index }
}
/// Execute a search query
pub async fn search(&self, query: &RetrievalQuery) -> Result<Vec<SearchResult>> {
// Step 1: Get candidate IDs from field filters
let candidate_ids = if query.filters.is_empty() {
self.index.get_all_ids().await?
} else {
self.index.get_ids_by_filters(&query.filters).await?
};
// Step 2: If text query is provided, filter by substring match
let mut results = Vec::new();
if let Some(text_query) = &query.text {
let text_query_lower = text_query.to_lowercase();
for id in candidate_ids {
// Fetch the object
if let Ok(obj) = self.client.get_object(&id).await {
// Check if text matches
let score = self.compute_text_score(&obj, &text_query_lower);
if score > 0.0 {
let snippet = self.extract_snippet(&obj, &text_query_lower);
results.push(SearchResult::new(id, score).with_snippet(snippet));
}
}
}
} else {
// No text query, return all candidates with score 1.0
for id in candidate_ids {
results.push(SearchResult::new(id, 1.0));
}
}
// Step 3: Sort by score (descending) and limit
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
results.truncate(query.top_k);
Ok(results)
}
/// Compute text match score (simple substring matching)
fn compute_text_score(&self, obj: &OsirisObject, query: &str) -> f32 {
let mut score = 0.0;
// Check title
if let Some(title) = &obj.meta.title {
if title.to_lowercase().contains(query) {
score += 0.5;
}
}
// Check text content
if let Some(text) = &obj.text {
if text.to_lowercase().contains(query) {
score += 0.5;
// Bonus for multiple occurrences
let count = text.to_lowercase().matches(query).count();
score += (count as f32 - 1.0) * 0.1;
}
}
// Check tags
for (key, value) in &obj.meta.tags {
if key.to_lowercase().contains(query) || value.to_lowercase().contains(query) {
score += 0.2;
}
}
score.min(1.0)
}
/// Extract a snippet around the matched text
fn extract_snippet(&self, obj: &OsirisObject, query: &str) -> String {
const SNIPPET_LENGTH: usize = 100;
// Try to find snippet in text
if let Some(text) = &obj.text {
let text_lower = text.to_lowercase();
if let Some(pos) = text_lower.find(query) {
let start = pos.saturating_sub(SNIPPET_LENGTH / 2);
let end = (pos + query.len() + SNIPPET_LENGTH / 2).min(text.len());
let mut snippet = text[start..end].to_string();
if start > 0 {
snippet = format!("...{}", snippet);
}
if end < text.len() {
snippet = format!("{}...", snippet);
}
return snippet;
}
}
// Fallback to title or first N chars
if let Some(title) = &obj.meta.title {
return title.clone();
}
if let Some(text) = &obj.text {
let end = SNIPPET_LENGTH.min(text.len());
let mut snippet = text[..end].to_string();
if end < text.len() {
snippet = format!("{}...", snippet);
}
return snippet;
}
String::from("[No content]")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
#[ignore]
async fn test_search() {
let client = HeroDbClient::new("redis://localhost:6379", 1).unwrap();
let engine = SearchEngine::new(client);
let query = RetrievalQuery::new("test".to_string())
.with_text("rust".to_string())
.with_top_k(10);
let results = engine.search(&query).await.unwrap();
assert!(results.len() <= 10);
}
}