fixed a few bugs related to vector embedding + added additional end to end documentation to showcase local and external embedders step-by-step + added example mock embedder python script
This commit is contained in:
46
src/rpc.rs
46
src/rpc.rs
@@ -9,7 +9,7 @@ use sha2::{Digest, Sha256};
|
||||
use crate::server::Server;
|
||||
use crate::options::DBOption;
|
||||
use crate::admin_meta;
|
||||
use crate::embedding::{EmbeddingConfig, EmbeddingProvider};
|
||||
use crate::embedding::EmbeddingConfig;
|
||||
use base64::{engine::general_purpose, Engine as _};
|
||||
|
||||
/// Database backend types
|
||||
@@ -248,9 +248,7 @@ pub trait Rpc {
|
||||
&self,
|
||||
db_id: u64,
|
||||
name: String,
|
||||
provider: String,
|
||||
model: String,
|
||||
params: Option<HashMap<String, String>>,
|
||||
config: EmbeddingConfig,
|
||||
) -> RpcResult<bool>;
|
||||
|
||||
/// Get per-dataset embedding configuration
|
||||
@@ -1008,9 +1006,7 @@ impl RpcServer for RpcServerImpl {
|
||||
&self,
|
||||
db_id: u64,
|
||||
name: String,
|
||||
provider: String,
|
||||
model: String,
|
||||
params: Option<HashMap<String, String>>,
|
||||
config: EmbeddingConfig,
|
||||
) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
@@ -1022,19 +1018,17 @@ impl RpcServer for RpcServerImpl {
|
||||
if !server.has_write_permission() {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "write permission denied", None::<()>));
|
||||
}
|
||||
let prov = match provider.to_lowercase().as_str() {
|
||||
"test-hash" | "testhash" => EmbeddingProvider::TestHash,
|
||||
"testimagehash" | "image-test-hash" | "imagetesthash" => EmbeddingProvider::ImageTestHash,
|
||||
"fastembed" | "lancefastembed" => EmbeddingProvider::LanceFastEmbed,
|
||||
"openai" | "lanceopenai" => EmbeddingProvider::LanceOpenAI,
|
||||
other => EmbeddingProvider::LanceOther(other.to_string()),
|
||||
};
|
||||
let cfg = EmbeddingConfig {
|
||||
provider: prov,
|
||||
model,
|
||||
params: params.unwrap_or_default(),
|
||||
};
|
||||
server.set_dataset_embedding_config(&name, &cfg)
|
||||
// Validate provider and dimension (only a minimal set is allowed for now)
|
||||
match config.provider {
|
||||
crate::embedding::EmbeddingProvider::openai
|
||||
| crate::embedding::EmbeddingProvider::test
|
||||
| crate::embedding::EmbeddingProvider::image_test => {}
|
||||
}
|
||||
if config.dim == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Invalid embedding config: dim must be > 0", None::<()>));
|
||||
}
|
||||
|
||||
server.set_dataset_embedding_config(&name, &config)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
@@ -1056,17 +1050,7 @@ impl RpcServer for RpcServerImpl {
|
||||
}
|
||||
let cfg = server.get_dataset_embedding_config(&name)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(serde_json::json!({
|
||||
"provider": match cfg.provider {
|
||||
EmbeddingProvider::TestHash => "test-hash",
|
||||
EmbeddingProvider::ImageTestHash => "testimagehash",
|
||||
EmbeddingProvider::LanceFastEmbed => "lancefastembed",
|
||||
EmbeddingProvider::LanceOpenAI => "lanceopenai",
|
||||
EmbeddingProvider::LanceOther(ref s) => s,
|
||||
},
|
||||
"model": cfg.model,
|
||||
"params": cfg.params
|
||||
}))
|
||||
Ok(serde_json::to_value(&cfg).unwrap_or(serde_json::json!({})))
|
||||
}
|
||||
|
||||
async fn lance_store_text(
|
||||
|
||||
Reference in New Issue
Block a user