feat: Enhance Jina client with additional embedding parameters
- Add `type_`, `truncate`, and `late_chunking` parameters to the `create_embeddings` function for finer control over embedding generation. This allows users to specify embedding type, truncation method, and whether to apply late chunking. - Rename model parameter to `model` for clarity and consistency. - Improve model enum naming for better readability and API consistency. - Add unit tests for the `create_embeddings` function to ensure correct functionality and handle potential errors.
This commit is contained in:
@@ -6,7 +6,7 @@ mut jina_client := jina.get()!
|
|||||||
|
|
||||||
embeddings := jina_client.create_embeddings(
|
embeddings := jina_client.create_embeddings(
|
||||||
input: ['Hello', 'World']
|
input: ['Hello', 'World']
|
||||||
model: .embeddings_v3
|
model: .jina_embeddings_v3
|
||||||
task: 'separation'
|
task: 'separation'
|
||||||
) or { panic('Error while creating embeddings: ${err}') }
|
) or { panic('Error while creating embeddings: ${err}') }
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,18 @@
|
|||||||
module jina
|
module jina
|
||||||
|
|
||||||
import freeflowuniverse.herolib.core.httpconnection
|
import freeflowuniverse.herolib.core.httpconnection
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
@[params]
|
@[params]
|
||||||
pub struct CreateEmbeddingParams {
|
pub struct CreateEmbeddingParams {
|
||||||
pub mut:
|
pub mut:
|
||||||
input []string @[required] // Input texts
|
input []string @[required] // Input texts
|
||||||
model JinaModelEnumerator @[required] // Model name
|
model JinaModel @[required] // Model name
|
||||||
task string @[required] // Task type
|
task string @[required] // Task type
|
||||||
|
type_ ?EmbeddingType // embedding type
|
||||||
|
truncate ?TruncateType // truncation type
|
||||||
|
late_chunking ?bool // Flag to determine if late chunking is applied
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create embeddings for input texts
|
// Create embeddings for input texts
|
||||||
@@ -20,6 +25,16 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd
|
|||||||
task: task
|
task: task
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if v := params.type_ {
|
||||||
|
embedding_input.type_ = v
|
||||||
|
}
|
||||||
|
|
||||||
|
if v := params.truncate {
|
||||||
|
embedding_input.truncate = v
|
||||||
|
}
|
||||||
|
|
||||||
|
embedding_input.late_chunking = if _ := params.late_chunking { true } else { false }
|
||||||
|
|
||||||
req := httpconnection.Request{
|
req := httpconnection.Request{
|
||||||
method: .post
|
method: .post
|
||||||
prefix: 'v1/embeddings'
|
prefix: 'v1/embeddings'
|
||||||
@@ -32,37 +47,6 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd
|
|||||||
return parse_model_embedding_output(response)!
|
return parse_model_embedding_output(response)!
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn (mut j Jina) start_bulk_embedding(file_path string, model string, email string) !BulkEmbeddingJobResponse {
|
|
||||||
// // Read the file content
|
|
||||||
// file_content := os.read_file(file_path) or {
|
|
||||||
// return error('Failed to read file: ${err}')
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Create a multipart form
|
|
||||||
// mut form := http.FormData{}
|
|
||||||
// form.add_field('file', file_content, 'input.csv', 'text/csv')
|
|
||||||
// form.add_field('model', model)
|
|
||||||
// form.add_field('email', email)
|
|
||||||
|
|
||||||
// // Create a custom HTTP request
|
|
||||||
// mut req := http.new_request(.post, '${j.base_url}/v1/bulk-embeddings', '')!
|
|
||||||
// req.header = j.http.default_header // Add Authorization header
|
|
||||||
// req.set_form_data(form) // Set multipart form data
|
|
||||||
|
|
||||||
// // Send the request
|
|
||||||
// response := req.do() or {
|
|
||||||
// return error('Failed to send bulk embedding request: ${err}')
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Check for errors
|
|
||||||
// if response.status_code != 200 {
|
|
||||||
// return error('Bulk embedding request failed with status ${response.status_code}: ${response.body}')
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Parse the JSON response
|
|
||||||
// return json.decode(BulkEmbeddingJobResponse, response.body)!
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Create embeddings with a TextDoc input
|
// // Create embeddings with a TextDoc input
|
||||||
// pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput {
|
// pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput {
|
||||||
|
|
||||||
|
|||||||
19
lib/clients/jina/jina_client_test.v
Normal file
19
lib/clients/jina/jina_client_test.v
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
module jina
|
||||||
|
|
||||||
|
fn setup_client() !&Jina {
|
||||||
|
mut client := get()!
|
||||||
|
return client
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_create_embeddings() {
|
||||||
|
mut client := setup_client()!
|
||||||
|
embeddings := client.create_embeddings(
|
||||||
|
input: ['Hello', 'World']
|
||||||
|
model: .jina_embeddings_v3
|
||||||
|
task: 'separation'
|
||||||
|
) or { panic('Error while creating embeddings: ${err}') }
|
||||||
|
|
||||||
|
assert embeddings.data.len > 0
|
||||||
|
assert embeddings.object == 'list' // Check the object type
|
||||||
|
assert embeddings.model == 'jina-embeddings-v3'
|
||||||
|
}
|
||||||
@@ -2,44 +2,44 @@ module jina
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
// JinaModelEnumerator represents the available models for Jina API
|
// JinaModel represents the available Jina models
|
||||||
pub enum JinaModelEnumerator {
|
pub enum JinaModel {
|
||||||
clip_v1 // jina-clip-v1, 223M, 768
|
jina_clip_v1
|
||||||
clip_v2 // jina-clip-v2, 865M, 1024
|
jina_clip_v2
|
||||||
embeddings_v2_base_en // jina-embeddings-v2-base-en, 137M, 768
|
jina_embeddings_v2_base_en
|
||||||
embeddings_v2_base_es // jina-embeddings-v2-base-es, 161M, 768
|
jina_embeddings_v2_base_es
|
||||||
embeddings_v2_base_de // jina-embeddings-v2-base-de, 161M, 768
|
jina_embeddings_v2_base_de
|
||||||
embeddings_v2_base_zh // jina-embeddings-v2-base-zh, 161M, 768
|
jina_embeddings_v2_base_zh
|
||||||
embeddings_v2_base_code // jina-embeddings-v2-base-code, 137M, 768
|
jina_embeddings_v2_base_code
|
||||||
embeddings_v3 // jina-embeddings-v3, 570M, 1024
|
jina_embeddings_v3
|
||||||
}
|
}
|
||||||
|
|
||||||
// to_string converts JinaModelEnumerator enum to its string representation
|
// to_string converts a JinaModel enum to its string representation as expected by the Jina API
|
||||||
pub fn (m JinaModelEnumerator) to_string() string {
|
pub fn (m JinaModel) to_string() string {
|
||||||
return match m {
|
return match m {
|
||||||
.clip_v1 { 'jina-clip-v1' }
|
.jina_clip_v1 { 'jina-clip-v1' }
|
||||||
.clip_v2 { 'jina-clip-v2' }
|
.jina_clip_v2 { 'jina-clip-v2' }
|
||||||
.embeddings_v2_base_en { 'jina-embeddings-v2-base-en' }
|
.jina_embeddings_v2_base_en { 'jina-embeddings-v2-base-en' }
|
||||||
.embeddings_v2_base_es { 'jina-embeddings-v2-base-es' }
|
.jina_embeddings_v2_base_es { 'jina-embeddings-v2-base-es' }
|
||||||
.embeddings_v2_base_de { 'jina-embeddings-v2-base-de' }
|
.jina_embeddings_v2_base_de { 'jina-embeddings-v2-base-de' }
|
||||||
.embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' }
|
.jina_embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' }
|
||||||
.embeddings_v2_base_code { 'jina-embeddings-v2-base-code' }
|
.jina_embeddings_v2_base_code { 'jina-embeddings-v2-base-code' }
|
||||||
.embeddings_v3 { 'jina-embeddings-v3' }
|
.jina_embeddings_v3 { 'jina-embeddings-v3' }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// from_string converts string to JinaModelEnumerator enum
|
// from_string converts a string to a JinaModel enum, returning an error if the string is invalid
|
||||||
pub fn jina_model_from_string(s string) !JinaModelEnumerator {
|
pub fn jina_model_from_string(s string) !JinaModel {
|
||||||
return match s {
|
return match s {
|
||||||
'jina-clip-v1' { JinaModelEnumerator.clip_v1 }
|
'jina-clip-v1' { JinaModel.jina_clip_v1 }
|
||||||
'jina-clip-v2' { JinaModelEnumerator.clip_v2 }
|
'jina-clip-v2' { JinaModel.jina_clip_v2 }
|
||||||
'jina-embeddings-v2-base-en' { JinaModelEnumerator.embeddings_v2_base_en }
|
'jina-embeddings-v2-base-en' { JinaModel.jina_embeddings_v2_base_en }
|
||||||
'jina-embeddings-v2-base-es' { JinaModelEnumerator.embeddings_v2_base_es }
|
'jina-embeddings-v2-base-es' { JinaModel.jina_embeddings_v2_base_es }
|
||||||
'jina-embeddings-v2-base-de' { JinaModelEnumerator.embeddings_v2_base_de }
|
'jina-embeddings-v2-base-de' { JinaModel.jina_embeddings_v2_base_de }
|
||||||
'jina-embeddings-v2-base-zh' { JinaModelEnumerator.embeddings_v2_base_zh }
|
'jina-embeddings-v2-base-zh' { JinaModel.jina_embeddings_v2_base_zh }
|
||||||
'jina-embeddings-v2-base-code' { JinaModelEnumerator.embeddings_v2_base_code }
|
'jina-embeddings-v2-base-code' { JinaModel.jina_embeddings_v2_base_code }
|
||||||
'jina-embeddings-v3' { JinaModelEnumerator.embeddings_v3 }
|
'jina-embeddings-v3' { JinaModel.jina_embeddings_v3 }
|
||||||
else { error('Invalid model string: ${s}') }
|
else { error('Invalid Jina model string: ${s}') }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user