From 644946f1ca8d9c9e9c738339442bb917bb153c50 Mon Sep 17 00:00:00 2001 From: Maxime Van Hees Date: Mon, 29 Sep 2025 14:55:41 +0200 Subject: [PATCH] WIP3 implemeting lancedb --- Cargo.lock | 158 +++++++++++++++++ Cargo.toml | 1 + docs/lance.md | 444 +++++++++++++++++++++++++++++++++++++++++++++++ src/embedding.rs | 200 ++++++++++++++++++++- 4 files changed, 799 insertions(+), 4 deletions(-) create mode 100644 docs/lance.md diff --git a/Cargo.lock b/Cargo.lock index 82f88ec..866418f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2358,6 +2358,15 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -2535,6 +2544,21 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2902,6 +2926,7 @@ dependencies = [ "rand 0.8.5", "redb", "redis", + "reqwest", "secrecy", "serde", "serde_json", @@ -3109,6 +3134,22 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.7.0", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.16" @@ -3128,9 +3169,11 @@ dependencies = [ "percent-encoding", "pin-project-lite", "socket2 0.6.0", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -4469,6 +4512,12 @@ dependencies = [ "libc", ] +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4537,6 +4586,23 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" +[[package]] +name = "native-tls" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework 2.11.1", + "security-framework-sys", + "tempfile", +] + [[package]] name = "nom" version = "7.1.3" @@ -4787,12 +4853,50 @@ dependencies = [ "uuid", ] +[[package]] +name = "openssl" +version = "0.10.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" +dependencies = [ + "bitflags 2.9.3", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "openssl-probe" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-sys" +version = "0.9.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -5583,6 +5687,8 @@ checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" dependencies = [ "base64 0.22.1", "bytes", + "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2 0.4.12", @@ -5591,9 +5697,12 @@ dependencies = [ "http-body-util", "hyper 1.7.0", "hyper-rustls 0.27.7", + "hyper-tls", "hyper-util", "js-sys", "log", + "mime", + "native-tls", "percent-encoding", "pin-project-lite", "quinn", @@ -5605,6 +5714,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", + "tokio-native-tls", "tokio-rustls 0.26.2", "tokio-util", "tower", @@ -6485,6 +6595,27 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.9.3", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -6936,6 +7067,16 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -7256,6 +7397,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -7577,6 +7724,17 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-registry" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" +dependencies = [ + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + [[package]] name = "windows-result" version = "0.3.4" diff --git a/Cargo.toml b/Cargo.toml index 27b587e..55b6531 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ lance-index = "0.37.0" arrow = "55.2.0" lancedb = "0.22.1" uuid = "1.18.1" +reqwest = { version = "0.12", features = ["blocking", "json", "rustls-tls"] } [dev-dependencies] redis = { version = "0.24", features = ["aio", "tokio-comp"] } diff --git a/docs/lance.md b/docs/lance.md new file mode 100644 index 0000000..2ea1b24 --- /dev/null +++ b/docs/lance.md @@ -0,0 +1,444 @@ +# Lance Vector Backend (RESP + JSON-RPC) + +This document explains how to use HeroDB’s Lance-backed vector store. It is text-first: users provide text, and HeroDB computes embeddings server-side (no manual vectors). It includes copy-pasteable RESP (redis-cli) and JSON-RPC examples for: + +- Creating a Lance database +- Embedding provider configuration (OpenAI, Azure OpenAI, or deterministic test provider) +- Dataset lifecycle: CREATE, LIST, INFO, DROP +- Ingestion: STORE text (+ optional metadata) +- Search: QUERY with K, optional FILTER and RETURN +- Delete by id +- Index creation (currently a placeholder/no-op) + +References: +- Implementation: [src/lance_store.rs](src/lance_store.rs), [src/cmd.rs](src/cmd.rs), [src/rpc.rs](src/rpc.rs), [src/server.rs](src/server.rs), [src/embedding.rs](src/embedding.rs) + +Notes: +- Admin DB 0 cannot be Lance (or Tantivy). Only databases with id >= 1 can use Lance. +- Permissions: + - Read operations (SEARCH, LIST, INFO) require read permission. + - Mutating operations (CREATE, STORE, CREATEINDEX, DEL, DROP, EMBEDDING CONFIG SET) require readwrite permission. +- Backend gating: + - If a DB is Lance, only LANCE.* and basic control commands (PING, ECHO, SELECT, INFO, CLIENT, etc.) are permitted. + - If a DB is not Lance, LANCE.* commands return an error. + +Storage layout and schema: +- Files live at: /lance//.lance +- Records schema: + - id: Utf8 (non-null) + - vector: FixedSizeList (non-null) + - text: Utf8 (nullable) + - meta: Utf8 JSON (nullable) +- Search is an L2 KNN brute-force scan for now (lower score = better). Index creation is a no-op placeholder to be implemented later. + +Prerequisites: +- Start HeroDB with RPC enabled (for management calls): + - See [docs/basics.md](./basics.md) for flags. Example: + ```bash + ./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --port 6379 --enable-rpc + ``` + + +## 0) Create a Lance-backed database (JSON-RPC) + +Use the management API to create a database with backend "Lance". DB 0 is reserved for admin and cannot be Lance. + +Request: +```json +{ + "jsonrpc": "2.0", + "id": 1, + "method": "herodb_createDatabase", + "params": [ + "Lance", + { "name": "vectors-db", "storage_path": null, "max_size": null, "redis_version": null }, + null + ] +} +``` + +- Response contains the allocated db_id (>= 1). Use that id below (replace 1 with your actual id). + +Select the database over RESP: +```bash +redis-cli -p 6379 SELECT 1 +# → OK +``` + + +## 1) Configure embedding provider (server-side embeddings) + +HeroDB embeds text internally at STORE/SEARCH time using a per-dataset EmbeddingConfig sidecar. Configure provider before creating a dataset to choose dimensions and provider. + +Supported providers: +- openai (standard OpenAI or Azure OpenAI) +- testhash (deterministic, CI-friendly; no network) + +Environment variables for OpenAI: +- Standard OpenAI: export OPENAI_API_KEY=sk-... +- Azure OpenAI: export AZURE_OPENAI_API_KEY=... + +RESP examples: +```bash +# Standard OpenAI with default dims (model-dependent, e.g. 1536) +redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small + +# OpenAI with reduced output dimension (e.g., 512) when supported +redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small PARAM dim 512 + +# Azure OpenAI (set env: AZURE_OPENAI_API_KEY) +redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small \ + PARAM use_azure true \ + PARAM azure_endpoint https://myresource.openai.azure.com \ + PARAM azure_deployment my-embed-deploy \ + PARAM azure_api_version 2024-02-15 \ + PARAM dim 512 + +# Deterministic test provider (no network, stable vectors) +redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER testhash MODEL any +``` + +Read config: +```bash +redis-cli -p 6379 LANCE.EMBEDDING CONFIG GET myset +# → JSON blob describing provider/model/params +``` + +JSON-RPC examples: +```json +{ + "jsonrpc": "2.0", + "id": 2, + "method": "herodb_lanceSetEmbeddingConfig", + "params": [ + 1, + "myset", + "openai", + "text-embedding-3-small", + { "dim": "512" } + ] +} +``` + +```json +{ + "jsonrpc": "2.0", + "id": 3, + "method": "herodb_lanceGetEmbeddingConfig", + "params": [1, "myset"] +} +``` + + +## 2) Create a dataset + +Choose a dimension that matches your embedding configuration. For OpenAI text-embedding-3-small without dimension override, typical dimension is 1536; when `dim` is set (e.g., 512), use that. The current API requires an explicit DIM. + +RESP: +```bash +redis-cli -p 6379 LANCE.CREATE myset DIM 512 +# → OK +``` + +JSON-RPC: +```json +{ + "jsonrpc": "2.0", + "id": 4, + "method": "herodb_lanceCreate", + "params": [1, "myset", 512] +} +``` + + +## 3) Store text documents (server-side embedding) + +Provide your id, the text to embed, and optional META fields. The server computes the embedding using the configured provider and stores id/vector/text/meta in the Lance dataset. Upserts by id are supported via delete-then-append semantics. + +RESP: +```bash +redis-cli -p 6379 LANCE.STORE myset ID doc-1 TEXT "Hello vector world" META title "Hello" category "demo" +# → OK +``` + +JSON-RPC: +```json +{ + "jsonrpc": "2.0", + "id": 5, + "method": "herodb_lanceStoreText", + "params": [ + 1, + "myset", + "doc-1", + "Hello vector world", + { "title": "Hello", "category": "demo" } + ] +} +``` + + +## 4) Search with a text query + +Provide a query string; the server embeds it and performs KNN search. Optional: FILTER expression and RETURN subset of fields. + +RESP: +```bash +# K nearest neighbors for the query text +redis-cli -p 6379 LANCE.SEARCH myset K 5 QUERY "greetings to vectors" +# → Array of hits: [id, score, [k,v, ...]] pairs, lower score = closer + +# With a filter on meta fields and return only title +redis-cli -p 6379 LANCE.SEARCH myset K 3 QUERY "greetings to vectors" FILTER "category = 'demo'" RETURN 1 title +``` + +JSON-RPC: +```json +{ + "jsonrpc": "2.0", + "id": 6, + "method": "herodb_lanceSearchText", + "params": [1, "myset", "greetings to vectors", 5, null, null] +} +``` + +With filter and selected fields: +```json +{ + "jsonrpc": "2.0", + "id": 7, + "method": "herodb_lanceSearchText", + "params": [1, "myset", "greetings to vectors", 3, "category = 'demo'", ["title"]] +} +``` + +Response shape: +- RESP over redis-cli: an array of hits [id, score, [k, v, ...]]. +- JSON-RPC returns an object containing the RESP-encoded wire format string or a structured result depending on implementation. See [src/rpc.rs](src/rpc.rs) for details. + + +## 5) Create an index (placeholder) + +Index creation currently returns OK but is a no-op. It will integrate Lance vector indices in a future update. + +RESP: +```bash +redis-cli -p 6379 LANCE.CREATEINDEX myset TYPE "ivf_pq" PARAM nlist 100 PARAM pq_m 16 +# → OK (no-op for now) +``` + +JSON-RPC: +```json +{ + "jsonrpc": "2.0", + "id": 8, + "method": "herodb_lanceCreateIndex", + "params": [1, "myset", "ivf_pq", { "nlist": "100", "pq_m": "16" }] +} +``` + + +## 6) Inspect datasets + +RESP: +```bash +# List datasets in current Lance DB +redis-cli -p 6379 LANCE.LIST + +# Get dataset info +redis-cli -p 6379 LANCE.INFO myset +``` + +JSON-RPC: +```json +{ + "jsonrpc": "2.0", + "id": 9, + "method": "herodb_lanceList", + "params": [1] +} +``` + +```json +{ + "jsonrpc": "2.0", + "id": 10, + "method": "herodb_lanceInfo", + "params": [1, "myset"] +} +``` + + +## 7) Delete and drop + +RESP: +```bash +# Delete by id +redis-cli -p 6379 LANCE.DEL myset doc-1 +# → OK + +# Drop the entire dataset +redis-cli -p 6379 LANCE.DROP myset +# → OK +``` + +JSON-RPC: +```json +{ + "jsonrpc": "2.0", + "id": 11, + "method": "herodb_lanceDel", + "params": [1, "myset", "doc-1"] +} +``` + +```json +{ + "jsonrpc": "2.0", + "id": 12, + "method": "herodb_lanceDrop", + "params": [1, "myset"] +} +``` + + +## 8) End-to-end example (RESP) + +```bash +# 1. Select Lance DB (assume db_id=1 created via RPC) +redis-cli -p 6379 SELECT 1 + +# 2. Configure embedding provider (OpenAI small model at 512 dims) +redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small PARAM dim 512 + +# 3. Create dataset +redis-cli -p 6379 LANCE.CREATE myset DIM 512 + +# 4. Store documents +redis-cli -p 6379 LANCE.STORE myset ID doc-1 TEXT "The quick brown fox jumps over the lazy dog" META title "Fox" category "animal" +redis-cli -p 6379 LANCE.STORE myset ID doc-2 TEXT "A fast auburn fox vaulted a sleepy canine" META title "Fox paraphrase" category "animal" + +# 5. Search +redis-cli -p 6379 LANCE.SEARCH myset K 2 QUERY "quick brown fox" RETURN 1 title + +# 6. Dataset info and listing +redis-cli -p 6379 LANCE.INFO myset +redis-cli -p 6379 LANCE.LIST + +# 7. Delete and drop +redis-cli -p 6379 LANCE.DEL myset doc-2 +redis-cli -p 6379 LANCE.DROP myset +``` + + +## 9) End-to-end example (JSON-RPC) + +Assume RPC server on port 8080. Replace ids and ports as needed. + +1) Create Lance DB: +```json +{ + "jsonrpc": "2.0", + "id": 100, + "method": "herodb_createDatabase", + "params": ["Lance", { "name": "vectors-db", "storage_path": null, "max_size": null, "redis_version": null }, null] +} +``` + +2) Set embedding config: +```json +{ + "jsonrpc": "2.0", + "id": 101, + "method": "herodb_lanceSetEmbeddingConfig", + "params": [1, "myset", "openai", "text-embedding-3-small", { "dim": "512" }] +} +``` + +3) Create dataset: +```json +{ + "jsonrpc": "2.0", + "id": 102, + "method": "herodb_lanceCreate", + "params": [1, "myset", 512] +} +``` + +4) Store text: +```json +{ + "jsonrpc": "2.0", + "id": 103, + "method": "herodb_lanceStoreText", + "params": [1, "myset", "doc-1", "The quick brown fox jumps over the lazy dog", { "title": "Fox", "category": "animal" }] +} +``` + +5) Search text: +```json +{ + "jsonrpc": "2.0", + "id": 104, + "method": "herodb_lanceSearchText", + "params": [1, "myset", "quick brown fox", 2, null, ["title"]] +} +``` + +6) Info/list: +```json +{ + "jsonrpc": "2.0", + "id": 105, + "method": "herodb_lanceInfo", + "params": [1, "myset"] +} +``` + +```json +{ + "jsonrpc": "2.0", + "id": 106, + "method": "herodb_lanceList", + "params": [1] +} +``` + +7) Delete/drop: +```json +{ + "jsonrpc": "2.0", + "id": 107, + "method": "herodb_lanceDel", + "params": [1, "myset", "doc-1"] +} +``` + +```json +{ + "jsonrpc": "2.0", + "id": 108, + "method": "herodb_lanceDrop", + "params": [1, "myset"] +} +``` + + +## 10) Operational notes and troubleshooting + +- If using OpenAI and you see “missing API key env”, set: + - Standard: `export OPENAI_API_KEY=sk-...` + - Azure: `export AZURE_OPENAI_API_KEY=...` and pass `use_azure true`, `azure_endpoint`, `azure_deployment`, `azure_api_version`. +- Dimensions mismatch: + - Ensure the dataset DIM equals the provider’s embedding dim. For OpenAI text-embedding-3 models, set `PARAM dim 512` (or another supported size) and use that same DIM for `LANCE.CREATE`. +- DB 0 restriction: + - Lance is not allowed on DB 0. Use db_id >= 1. +- Permissions: + - Read operations (SEARCH, LIST, INFO) require read permission. + - Mutations (CREATE, STORE, CREATEINDEX, DEL, DROP, EMBEDDING CONFIG SET) require readwrite permission. +- Backend gating: + - On Lance DBs, only LANCE.* commands are accepted (plus basic control). +- Current index behavior: + - `LANCE.CREATEINDEX` returns OK but is a no-op. Future versions will integrate Lance vector indices. +- Implementation files for reference: + - [src/lance_store.rs](src/lance_store.rs), [src/cmd.rs](src/cmd.rs), [src/rpc.rs](src/rpc.rs), [src/server.rs](src/server.rs), [src/embedding.rs](src/embedding.rs) \ No newline at end of file diff --git a/src/embedding.rs b/src/embedding.rs index db982b9..0c742d4 100644 --- a/src/embedding.rs +++ b/src/embedding.rs @@ -21,6 +21,12 @@ use serde::{Deserialize, Serialize}; use crate::error::DBError; +// Networking for OpenAI/Azure +use std::time::Duration; +use reqwest::blocking::Client; +use reqwest::header::{HeaderMap, HeaderName, HeaderValue, CONTENT_TYPE, AUTHORIZATION}; +use serde_json::json; + /// Provider identifiers. Extend as needed to mirror LanceDB-supported providers. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] @@ -122,17 +128,203 @@ impl Embedder for TestHashEmbedder { } } +//// OpenAI embedder (supports OpenAI and Azure OpenAI via REST) +struct OpenAIEmbedder { + model: String, + dim: usize, + client: Client, + endpoint: String, + auth_header_name: HeaderName, + auth_header_value: HeaderValue, + use_azure: bool, +} + +impl OpenAIEmbedder { + fn new_from_config(cfg: &EmbeddingConfig) -> Result { + // Whether to use Azure OpenAI + let use_azure = cfg + .get_param_string("use_azure") + .map(|s| s.eq_ignore_ascii_case("true")) + .unwrap_or(false); + + // Resolve API key (OPENAI_API_KEY or AZURE_OPENAI_API_KEY by default) + let api_key_env = cfg + .get_param_string("api_key_env") + .unwrap_or_else(|| { + if use_azure { + "AZURE_OPENAI_API_KEY".to_string() + } else { + "OPENAI_API_KEY".to_string() + } + }); + let api_key = std::env::var(&api_key_env) + .map_err(|_| DBError(format!("Missing API key in env '{}'", api_key_env)))?; + + // Resolve endpoint + // - Standard OpenAI: https://api.openai.com/v1/embeddings (default) or params["base_url"] + // - Azure OpenAI: {azure_endpoint}/openai/deployments/{deployment}/embeddings?api-version=... + let endpoint = if use_azure { + let base = cfg + .get_param_string("azure_endpoint") + .ok_or_else(|| DBError("Missing 'azure_endpoint' for Azure OpenAI".into()))?; + let deployment = cfg + .get_param_string("azure_deployment") + .unwrap_or_else(|| cfg.model.clone()); + let api_version = cfg + .get_param_string("azure_api_version") + .unwrap_or_else(|| "2023-05-15".to_string()); + format!( + "{}/openai/deployments/{}/embeddings?api-version={}", + base.trim_end_matches('/'), + deployment, + api_version + ) + } else { + cfg.get_param_string("base_url") + .unwrap_or_else(|| "https://api.openai.com/v1/embeddings".to_string()) + }; + + // Determine expected dimension: + // - Prefer params["dim"] or params["dimensions"] + // - Else default to 1536 (common for text-embedding-3-small; callers should override if needed) + let dim = cfg + .get_param_usize("dim") + .or_else(|| cfg.get_param_usize("dimensions")) + .unwrap_or(1536); + + // Build default headers + let mut headers = HeaderMap::new(); + headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + let (auth_name, auth_val) = if use_azure { + let name = HeaderName::from_static("api-key"); + let val = HeaderValue::from_str(&api_key) + .map_err(|_| DBError("Invalid API key header value".into()))?; + (name, val) + } else { + let bearer = format!("Bearer {}", api_key); + (AUTHORIZATION, HeaderValue::from_str(&bearer).map_err(|_| DBError("Invalid Authorization header".into()))?) + }; + + let client = Client::builder() + .timeout(Duration::from_secs(30)) + .default_headers(headers) + .build() + .map_err(|e| DBError(format!("Failed to build HTTP client: {}", e)))?; + + Ok(Self { + model: cfg.model.clone(), + dim, + client, + endpoint, + auth_header_name: auth_name, + auth_header_value: auth_val, + use_azure, + }) + } + + fn request_many(&self, inputs: &[String]) -> Result>, DBError> { + // Compose request body: + // - Standard OpenAI: { "model": ..., "input": [...], "dimensions": dim? } + // - Azure: { "input": [...], "dimensions": dim? } (model from deployment) + let mut body = if self.use_azure { + json!({ "input": inputs }) + } else { + json!({ "model": self.model, "input": inputs }) + }; + if self.dim > 0 { + body.as_object_mut() + .unwrap() + .insert("dimensions".to_string(), json!(self.dim)); + } + + let mut req = self.client.post(&self.endpoint); + // Add auth header dynamically + req = req.header(self.auth_header_name.clone(), self.auth_header_value.clone()); + + let resp = req + .json(&body) + .send() + .map_err(|e| DBError(format!("HTTP request failed: {}", e)))?; + if !resp.status().is_success() { + let code = resp.status(); + let text = resp.text().unwrap_or_default(); + return Err(DBError(format!("Embeddings API error {}: {}", code, text))); + } + let val: serde_json::Value = resp + .json() + .map_err(|e| DBError(format!("Invalid JSON from embeddings API: {}", e)))?; + + let data = val + .get("data") + .and_then(|d| d.as_array()) + .ok_or_else(|| DBError("Embeddings API response missing 'data' array".into()))?; + + let mut out: Vec> = Vec::with_capacity(data.len()); + for item in data { + let emb = item + .get("embedding") + .and_then(|e| e.as_array()) + .ok_or_else(|| DBError("Embeddings API item missing 'embedding'".into()))?; + let mut v: Vec = Vec::with_capacity(emb.len()); + for n in emb { + let f = n + .as_f64() + .ok_or_else(|| DBError("Embedding element is not a number".into()))?; + v.push(f as f32); + } + if self.dim > 0 && v.len() != self.dim { + return Err(DBError(format!( + "Embedding dimension mismatch: expected {}, got {}. Configure 'dim' or 'dimensions' to match output.", + self.dim, v.len() + ))); + } + out.push(v); + } + Ok(out) + } +} + +impl Embedder for OpenAIEmbedder { + fn name(&self) -> String { + if self.use_azure { + format!("azure-openai:{}", self.model) + } else { + format!("openai:{}", self.model) + } + } + + fn dim(&self) -> usize { + self.dim + } + + fn embed(&self, text: &str) -> Result, DBError> { + let v = self.request_many(&[text.to_string()])?; + Ok(v.into_iter().next().unwrap_or_else(|| vec![0.0; self.dim])) + } + + fn embed_many(&self, texts: &[String]) -> Result>, DBError> { + if texts.is_empty() { + return Ok(vec![]); + } + self.request_many(texts) + } +} + /// Create an embedder instance from a config. /// - TestHash: uses params["dim"] or defaults to 64 -/// - Lance* providers: return an explicit error for now; implementers can wire these up +/// - LanceOpenAI: uses OpenAI (or Azure OpenAI) embeddings REST API +/// - Other Lance providers can be added similarly pub fn create_embedder(config: &EmbeddingConfig) -> Result, DBError> { match &config.provider { EmbeddingProvider::TestHash => { let dim = config.get_param_usize("dim").unwrap_or(64); Ok(Arc::new(TestHashEmbedder::new(dim, config.model.clone()))) } - EmbeddingProvider::LanceFastEmbed => Err(DBError("LanceFastEmbed provider not yet implemented in Rust embedding layer; configure 'test-hash' or implement a Lance-backed provider".into())), - EmbeddingProvider::LanceOpenAI => Err(DBError("LanceOpenAI provider not yet implemented in Rust embedding layer; configure 'test-hash' or implement a Lance-backed provider".into())), - EmbeddingProvider::LanceOther(p) => Err(DBError(format!("Lance provider '{}' not implemented; configure 'test-hash' or implement a Lance-backed provider", p))), + EmbeddingProvider::LanceOpenAI => { + let inner = OpenAIEmbedder::new_from_config(config)?; + Ok(Arc::new(inner)) + } + EmbeddingProvider::LanceFastEmbed => Err(DBError("LanceFastEmbed provider not yet implemented in Rust embedding layer; configure 'test-hash' or use 'openai'".into())), + EmbeddingProvider::LanceOther(p) => Err(DBError(format!("Lance provider '{}' not implemented; configure 'openai' or 'test-hash'", p))), } } \ No newline at end of file