feat: Add multi-vector API support

- Added a new `create_multi_vector` function to the Jina client
  to support creating multi-vector embeddings.
- Added a new `multi_vector_api.v` file containing the
  implementation for the multi-vector API.
- Updated the `jina.vsh` example to demonstrate the usage of the
  new multi-vector API.
This commit is contained in:
Mahmoud Emad
2025-03-12 16:16:37 +02:00
parent 5194fabe62
commit a7976c45f9
3 changed files with 115 additions and 1 deletions

View File

@@ -65,3 +65,20 @@ delete_result := jina_client.delete_classifier(classifier_id: classifiers[0].cla
panic('Error deleting classifier: ${err}')
}
println('Delete result: ${delete_result}')
// Create multi vector
multi_vector := jina_client.create_multi_vector(
input: [
jina.MultiVectorTextDoc{
text: 'Hello world'
input_type: .document
},
jina.MultiVectorTextDoc{
text: "What's up?"
input_type: .query
},
]
embedding_type: ['float']
// dimensions: 96
)!
println('Multi vector: ${multi_vector}')

View File

@@ -2,7 +2,6 @@ module jina
import freeflowuniverse.herolib.data.encoderhero
import freeflowuniverse.herolib.core.httpconnection
import net.http
import os
pub const version = '0.0.0'

View File

@@ -0,0 +1,98 @@
module jina
import json
import freeflowuniverse.herolib.core.httpconnection
// Enum for available Jina multi-vector models
pub enum MultiVectorModel {
jina_colbert_v1_en // jina-colbert-v1-en
}
// Convert the enum to a valid string
pub fn (m MultiVectorModel) to_string() string {
return match m {
.jina_colbert_v1_en { 'jina-colbert-v1-en' }
}
}
// Enum for input types
pub enum MultiVectorInputType {
document // document
query // query
}
// MultiVectorTextDoc represents a text document for a multi-vector request
pub struct MultiVectorTextDoc {
pub mut:
id ?string // Optional: ID of the document
text string @[required] // Text of the document
input_type ?MultiVectorInputType // Optional: Type of the embedding to compute, query or document
}
// MultiVectorRequest represents the JSON request body for the /v1/multi-vector endpoint
struct MultiVectorRequest {
model string // Model name
input []MultiVectorTextDoc // Input documents
embedding_type ?[]string // Optional: Embedding type
dimensions ?int // Optional: Number of dimensions
}
// MultiVectorResponse represents the JSON response body for the /v1/multi-vector endpoint
pub struct MultiVectorResponse {
data []Embedding // List of embeddings
usage Usage // Usage information
model string // Model name
object string // Object type as string
}
// EmbeddingObjType represents the embeddings object in the response
pub struct EmbeddingObjType {
pub mut:
float ?[][]f64 // Optional 2D array of floats for multi-vector embeddings
base64 ?[]string // Optional array of base64 strings
binary ?[]u8 // Optional array of bytes
}
// SEmbeddingType is a sum type to handle different embedding formats
pub type SEmbeddingType = EmbeddingObjType | []f64 | []string | []u8
// Embedding represents an embedding vector
pub struct Embedding {
index int // Index of the document
embeddings SEmbeddingType // Embedding vector as a sum type
object string // Object type as string
}
// MultiVectorParams represents the parameters for a multi-vector request
@[params]
pub struct MultiVectorParams {
pub mut:
model MultiVectorModel = .jina_colbert_v1_en // Model name
input []MultiVectorTextDoc // Input documents
input_type ?MultiVectorInputType // Optional: Type of the embedding to compute, query or document
embedding_type ?[]string // Optional: Embedding type
dimensions ?int // Optional: Number of dimensions
}
// CreateMultiVector creates a multi-vector request and returns the response
pub fn (mut j Jina) create_multi_vector(params MultiVectorParams) !MultiVectorResponse {
request := MultiVectorRequest{
model: params.model.to_string()
input: params.input
embedding_type: params.embedding_type
dimensions: params.dimensions
}
req := httpconnection.Request{
method: .post
prefix: 'v1/multi-vector'
dataformat: .json
data: json.encode(request)
}
mut httpclient := j.httpclient()!
response := httpclient.post_json_str(req)!
println('response: ${response}')
result := json.decode(MultiVectorResponse, response)!
return result
}