feat: Enhance Jina client with additional embedding parameters

- Add `type_`, `truncate`, and `late_chunking` parameters to the
  `create_embeddings` function for finer control over embedding
  generation. This allows users to specify embedding type,
  truncation method, and whether to apply late chunking.
- Rename model parameter to `model` for clarity and consistency.
- Improve model enum naming for better readability and API consistency.
- Add unit tests for the `create_embeddings` function to ensure
  correct functionality and handle potential errors.
This commit is contained in:
Mahmoud Emad
2025-03-11 17:45:55 +02:00
parent b006bb1e41
commit 7965883744
4 changed files with 69 additions and 66 deletions

View File

@@ -6,7 +6,7 @@ mut jina_client := jina.get()!
embeddings := jina_client.create_embeddings( embeddings := jina_client.create_embeddings(
input: ['Hello', 'World'] input: ['Hello', 'World']
model: .embeddings_v3 model: .jina_embeddings_v3
task: 'separation' task: 'separation'
) or { panic('Error while creating embeddings: ${err}') } ) or { panic('Error while creating embeddings: ${err}') }

View File

@@ -1,13 +1,18 @@
module jina module jina
import freeflowuniverse.herolib.core.httpconnection import freeflowuniverse.herolib.core.httpconnection
import os
import json
@[params] @[params]
pub struct CreateEmbeddingParams { pub struct CreateEmbeddingParams {
pub mut: pub mut:
input []string @[required] // Input texts input []string @[required] // Input texts
model JinaModelEnumerator @[required] // Model name model JinaModel @[required] // Model name
task string @[required] // Task type task string @[required] // Task type
type_ ?EmbeddingType // embedding type
truncate ?TruncateType // truncation type
late_chunking ?bool // Flag to determine if late chunking is applied
} }
// Create embeddings for input texts // Create embeddings for input texts
@@ -20,6 +25,16 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd
task: task task: task
} }
if v := params.type_ {
embedding_input.type_ = v
}
if v := params.truncate {
embedding_input.truncate = v
}
embedding_input.late_chunking = if _ := params.late_chunking { true } else { false }
req := httpconnection.Request{ req := httpconnection.Request{
method: .post method: .post
prefix: 'v1/embeddings' prefix: 'v1/embeddings'
@@ -32,37 +47,6 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd
return parse_model_embedding_output(response)! return parse_model_embedding_output(response)!
} }
// pub fn (mut j Jina) start_bulk_embedding(file_path string, model string, email string) !BulkEmbeddingJobResponse {
// // Read the file content
// file_content := os.read_file(file_path) or {
// return error('Failed to read file: ${err}')
// }
// // Create a multipart form
// mut form := http.FormData{}
// form.add_field('file', file_content, 'input.csv', 'text/csv')
// form.add_field('model', model)
// form.add_field('email', email)
// // Create a custom HTTP request
// mut req := http.new_request(.post, '${j.base_url}/v1/bulk-embeddings', '')!
// req.header = j.http.default_header // Add Authorization header
// req.set_form_data(form) // Set multipart form data
// // Send the request
// response := req.do() or {
// return error('Failed to send bulk embedding request: ${err}')
// }
// // Check for errors
// if response.status_code != 200 {
// return error('Bulk embedding request failed with status ${response.status_code}: ${response.body}')
// }
// // Parse the JSON response
// return json.decode(BulkEmbeddingJobResponse, response.body)!
// }
// // Create embeddings with a TextDoc input // // Create embeddings with a TextDoc input
// pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput { // pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput {

View File

@@ -0,0 +1,19 @@
module jina
fn setup_client() !&Jina {
mut client := get()!
return client
}
fn test_create_embeddings() {
mut client := setup_client()!
embeddings := client.create_embeddings(
input: ['Hello', 'World']
model: .jina_embeddings_v3
task: 'separation'
) or { panic('Error while creating embeddings: ${err}') }
assert embeddings.data.len > 0
assert embeddings.object == 'list' // Check the object type
assert embeddings.model == 'jina-embeddings-v3'
}

View File

@@ -2,44 +2,44 @@ module jina
import json import json
// JinaModelEnumerator represents the available models for Jina API // JinaModel represents the available Jina models
pub enum JinaModelEnumerator { pub enum JinaModel {
clip_v1 // jina-clip-v1, 223M, 768 jina_clip_v1
clip_v2 // jina-clip-v2, 865M, 1024 jina_clip_v2
embeddings_v2_base_en // jina-embeddings-v2-base-en, 137M, 768 jina_embeddings_v2_base_en
embeddings_v2_base_es // jina-embeddings-v2-base-es, 161M, 768 jina_embeddings_v2_base_es
embeddings_v2_base_de // jina-embeddings-v2-base-de, 161M, 768 jina_embeddings_v2_base_de
embeddings_v2_base_zh // jina-embeddings-v2-base-zh, 161M, 768 jina_embeddings_v2_base_zh
embeddings_v2_base_code // jina-embeddings-v2-base-code, 137M, 768 jina_embeddings_v2_base_code
embeddings_v3 // jina-embeddings-v3, 570M, 1024 jina_embeddings_v3
} }
// to_string converts JinaModelEnumerator enum to its string representation // to_string converts a JinaModel enum to its string representation as expected by the Jina API
pub fn (m JinaModelEnumerator) to_string() string { pub fn (m JinaModel) to_string() string {
return match m { return match m {
.clip_v1 { 'jina-clip-v1' } .jina_clip_v1 { 'jina-clip-v1' }
.clip_v2 { 'jina-clip-v2' } .jina_clip_v2 { 'jina-clip-v2' }
.embeddings_v2_base_en { 'jina-embeddings-v2-base-en' } .jina_embeddings_v2_base_en { 'jina-embeddings-v2-base-en' }
.embeddings_v2_base_es { 'jina-embeddings-v2-base-es' } .jina_embeddings_v2_base_es { 'jina-embeddings-v2-base-es' }
.embeddings_v2_base_de { 'jina-embeddings-v2-base-de' } .jina_embeddings_v2_base_de { 'jina-embeddings-v2-base-de' }
.embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' } .jina_embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' }
.embeddings_v2_base_code { 'jina-embeddings-v2-base-code' } .jina_embeddings_v2_base_code { 'jina-embeddings-v2-base-code' }
.embeddings_v3 { 'jina-embeddings-v3' } .jina_embeddings_v3 { 'jina-embeddings-v3' }
} }
} }
// from_string converts string to JinaModelEnumerator enum // from_string converts a string to a JinaModel enum, returning an error if the string is invalid
pub fn jina_model_from_string(s string) !JinaModelEnumerator { pub fn jina_model_from_string(s string) !JinaModel {
return match s { return match s {
'jina-clip-v1' { JinaModelEnumerator.clip_v1 } 'jina-clip-v1' { JinaModel.jina_clip_v1 }
'jina-clip-v2' { JinaModelEnumerator.clip_v2 } 'jina-clip-v2' { JinaModel.jina_clip_v2 }
'jina-embeddings-v2-base-en' { JinaModelEnumerator.embeddings_v2_base_en } 'jina-embeddings-v2-base-en' { JinaModel.jina_embeddings_v2_base_en }
'jina-embeddings-v2-base-es' { JinaModelEnumerator.embeddings_v2_base_es } 'jina-embeddings-v2-base-es' { JinaModel.jina_embeddings_v2_base_es }
'jina-embeddings-v2-base-de' { JinaModelEnumerator.embeddings_v2_base_de } 'jina-embeddings-v2-base-de' { JinaModel.jina_embeddings_v2_base_de }
'jina-embeddings-v2-base-zh' { JinaModelEnumerator.embeddings_v2_base_zh } 'jina-embeddings-v2-base-zh' { JinaModel.jina_embeddings_v2_base_zh }
'jina-embeddings-v2-base-code' { JinaModelEnumerator.embeddings_v2_base_code } 'jina-embeddings-v2-base-code' { JinaModel.jina_embeddings_v2_base_code }
'jina-embeddings-v3' { JinaModelEnumerator.embeddings_v3 } 'jina-embeddings-v3' { JinaModel.jina_embeddings_v3 }
else { error('Invalid model string: ${s}') } else { error('Invalid Jina model string: ${s}') }
} }
} }