feat: Enhance Jina client with additional embedding parameters
- Add `type_`, `truncate`, and `late_chunking` parameters to the `create_embeddings` function for finer control over embedding generation. This allows users to specify embedding type, truncation method, and whether to apply late chunking. - Rename model parameter to `model` for clarity and consistency. - Improve model enum naming for better readability and API consistency. - Add unit tests for the `create_embeddings` function to ensure correct functionality and handle potential errors.
This commit is contained in:
@@ -6,7 +6,7 @@ mut jina_client := jina.get()!
|
||||
|
||||
embeddings := jina_client.create_embeddings(
|
||||
input: ['Hello', 'World']
|
||||
model: .embeddings_v3
|
||||
model: .jina_embeddings_v3
|
||||
task: 'separation'
|
||||
) or { panic('Error while creating embeddings: ${err}') }
|
||||
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
module jina
|
||||
|
||||
import freeflowuniverse.herolib.core.httpconnection
|
||||
import os
|
||||
import json
|
||||
|
||||
@[params]
|
||||
pub struct CreateEmbeddingParams {
|
||||
pub mut:
|
||||
input []string @[required] // Input texts
|
||||
model JinaModelEnumerator @[required] // Model name
|
||||
task string @[required] // Task type
|
||||
input []string @[required] // Input texts
|
||||
model JinaModel @[required] // Model name
|
||||
task string @[required] // Task type
|
||||
type_ ?EmbeddingType // embedding type
|
||||
truncate ?TruncateType // truncation type
|
||||
late_chunking ?bool // Flag to determine if late chunking is applied
|
||||
}
|
||||
|
||||
// Create embeddings for input texts
|
||||
@@ -20,6 +25,16 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd
|
||||
task: task
|
||||
}
|
||||
|
||||
if v := params.type_ {
|
||||
embedding_input.type_ = v
|
||||
}
|
||||
|
||||
if v := params.truncate {
|
||||
embedding_input.truncate = v
|
||||
}
|
||||
|
||||
embedding_input.late_chunking = if _ := params.late_chunking { true } else { false }
|
||||
|
||||
req := httpconnection.Request{
|
||||
method: .post
|
||||
prefix: 'v1/embeddings'
|
||||
@@ -32,37 +47,6 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd
|
||||
return parse_model_embedding_output(response)!
|
||||
}
|
||||
|
||||
// pub fn (mut j Jina) start_bulk_embedding(file_path string, model string, email string) !BulkEmbeddingJobResponse {
|
||||
// // Read the file content
|
||||
// file_content := os.read_file(file_path) or {
|
||||
// return error('Failed to read file: ${err}')
|
||||
// }
|
||||
|
||||
// // Create a multipart form
|
||||
// mut form := http.FormData{}
|
||||
// form.add_field('file', file_content, 'input.csv', 'text/csv')
|
||||
// form.add_field('model', model)
|
||||
// form.add_field('email', email)
|
||||
|
||||
// // Create a custom HTTP request
|
||||
// mut req := http.new_request(.post, '${j.base_url}/v1/bulk-embeddings', '')!
|
||||
// req.header = j.http.default_header // Add Authorization header
|
||||
// req.set_form_data(form) // Set multipart form data
|
||||
|
||||
// // Send the request
|
||||
// response := req.do() or {
|
||||
// return error('Failed to send bulk embedding request: ${err}')
|
||||
// }
|
||||
|
||||
// // Check for errors
|
||||
// if response.status_code != 200 {
|
||||
// return error('Bulk embedding request failed with status ${response.status_code}: ${response.body}')
|
||||
// }
|
||||
|
||||
// // Parse the JSON response
|
||||
// return json.decode(BulkEmbeddingJobResponse, response.body)!
|
||||
// }
|
||||
|
||||
// // Create embeddings with a TextDoc input
|
||||
// pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput {
|
||||
|
||||
|
||||
19
lib/clients/jina/jina_client_test.v
Normal file
19
lib/clients/jina/jina_client_test.v
Normal file
@@ -0,0 +1,19 @@
|
||||
module jina
|
||||
|
||||
fn setup_client() !&Jina {
|
||||
mut client := get()!
|
||||
return client
|
||||
}
|
||||
|
||||
fn test_create_embeddings() {
|
||||
mut client := setup_client()!
|
||||
embeddings := client.create_embeddings(
|
||||
input: ['Hello', 'World']
|
||||
model: .jina_embeddings_v3
|
||||
task: 'separation'
|
||||
) or { panic('Error while creating embeddings: ${err}') }
|
||||
|
||||
assert embeddings.data.len > 0
|
||||
assert embeddings.object == 'list' // Check the object type
|
||||
assert embeddings.model == 'jina-embeddings-v3'
|
||||
}
|
||||
@@ -2,44 +2,44 @@ module jina
|
||||
|
||||
import json
|
||||
|
||||
// JinaModelEnumerator represents the available models for Jina API
|
||||
pub enum JinaModelEnumerator {
|
||||
clip_v1 // jina-clip-v1, 223M, 768
|
||||
clip_v2 // jina-clip-v2, 865M, 1024
|
||||
embeddings_v2_base_en // jina-embeddings-v2-base-en, 137M, 768
|
||||
embeddings_v2_base_es // jina-embeddings-v2-base-es, 161M, 768
|
||||
embeddings_v2_base_de // jina-embeddings-v2-base-de, 161M, 768
|
||||
embeddings_v2_base_zh // jina-embeddings-v2-base-zh, 161M, 768
|
||||
embeddings_v2_base_code // jina-embeddings-v2-base-code, 137M, 768
|
||||
embeddings_v3 // jina-embeddings-v3, 570M, 1024
|
||||
// JinaModel represents the available Jina models
|
||||
pub enum JinaModel {
|
||||
jina_clip_v1
|
||||
jina_clip_v2
|
||||
jina_embeddings_v2_base_en
|
||||
jina_embeddings_v2_base_es
|
||||
jina_embeddings_v2_base_de
|
||||
jina_embeddings_v2_base_zh
|
||||
jina_embeddings_v2_base_code
|
||||
jina_embeddings_v3
|
||||
}
|
||||
|
||||
// to_string converts JinaModelEnumerator enum to its string representation
|
||||
pub fn (m JinaModelEnumerator) to_string() string {
|
||||
// to_string converts a JinaModel enum to its string representation as expected by the Jina API
|
||||
pub fn (m JinaModel) to_string() string {
|
||||
return match m {
|
||||
.clip_v1 { 'jina-clip-v1' }
|
||||
.clip_v2 { 'jina-clip-v2' }
|
||||
.embeddings_v2_base_en { 'jina-embeddings-v2-base-en' }
|
||||
.embeddings_v2_base_es { 'jina-embeddings-v2-base-es' }
|
||||
.embeddings_v2_base_de { 'jina-embeddings-v2-base-de' }
|
||||
.embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' }
|
||||
.embeddings_v2_base_code { 'jina-embeddings-v2-base-code' }
|
||||
.embeddings_v3 { 'jina-embeddings-v3' }
|
||||
.jina_clip_v1 { 'jina-clip-v1' }
|
||||
.jina_clip_v2 { 'jina-clip-v2' }
|
||||
.jina_embeddings_v2_base_en { 'jina-embeddings-v2-base-en' }
|
||||
.jina_embeddings_v2_base_es { 'jina-embeddings-v2-base-es' }
|
||||
.jina_embeddings_v2_base_de { 'jina-embeddings-v2-base-de' }
|
||||
.jina_embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' }
|
||||
.jina_embeddings_v2_base_code { 'jina-embeddings-v2-base-code' }
|
||||
.jina_embeddings_v3 { 'jina-embeddings-v3' }
|
||||
}
|
||||
}
|
||||
|
||||
// from_string converts string to JinaModelEnumerator enum
|
||||
pub fn jina_model_from_string(s string) !JinaModelEnumerator {
|
||||
// from_string converts a string to a JinaModel enum, returning an error if the string is invalid
|
||||
pub fn jina_model_from_string(s string) !JinaModel {
|
||||
return match s {
|
||||
'jina-clip-v1' { JinaModelEnumerator.clip_v1 }
|
||||
'jina-clip-v2' { JinaModelEnumerator.clip_v2 }
|
||||
'jina-embeddings-v2-base-en' { JinaModelEnumerator.embeddings_v2_base_en }
|
||||
'jina-embeddings-v2-base-es' { JinaModelEnumerator.embeddings_v2_base_es }
|
||||
'jina-embeddings-v2-base-de' { JinaModelEnumerator.embeddings_v2_base_de }
|
||||
'jina-embeddings-v2-base-zh' { JinaModelEnumerator.embeddings_v2_base_zh }
|
||||
'jina-embeddings-v2-base-code' { JinaModelEnumerator.embeddings_v2_base_code }
|
||||
'jina-embeddings-v3' { JinaModelEnumerator.embeddings_v3 }
|
||||
else { error('Invalid model string: ${s}') }
|
||||
'jina-clip-v1' { JinaModel.jina_clip_v1 }
|
||||
'jina-clip-v2' { JinaModel.jina_clip_v2 }
|
||||
'jina-embeddings-v2-base-en' { JinaModel.jina_embeddings_v2_base_en }
|
||||
'jina-embeddings-v2-base-es' { JinaModel.jina_embeddings_v2_base_es }
|
||||
'jina-embeddings-v2-base-de' { JinaModel.jina_embeddings_v2_base_de }
|
||||
'jina-embeddings-v2-base-zh' { JinaModel.jina_embeddings_v2_base_zh }
|
||||
'jina-embeddings-v2-base-code' { JinaModel.jina_embeddings_v2_base_code }
|
||||
'jina-embeddings-v3' { JinaModel.jina_embeddings_v3 }
|
||||
else { error('Invalid Jina model string: ${s}') }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user