diff --git a/examples/clients/jina.vsh b/examples/clients/jina.vsh index bc411e4f..26cfaae0 100755 --- a/examples/clients/jina.vsh +++ b/examples/clients/jina.vsh @@ -6,7 +6,7 @@ mut jina_client := jina.get()! embeddings := jina_client.create_embeddings( input: ['Hello', 'World'] - model: .embeddings_v3 + model: .jina_embeddings_v3 task: 'separation' ) or { panic('Error while creating embeddings: ${err}') } diff --git a/lib/clients/jina/jina_client.v b/lib/clients/jina/jina_client.v index 9322cb14..28a364f0 100644 --- a/lib/clients/jina/jina_client.v +++ b/lib/clients/jina/jina_client.v @@ -1,13 +1,18 @@ module jina import freeflowuniverse.herolib.core.httpconnection +import os +import json @[params] pub struct CreateEmbeddingParams { pub mut: - input []string @[required] // Input texts - model JinaModelEnumerator @[required] // Model name - task string @[required] // Task type + input []string @[required] // Input texts + model JinaModel @[required] // Model name + task string @[required] // Task type + type_ ?EmbeddingType // embedding type + truncate ?TruncateType // truncation type + late_chunking ?bool // Flag to determine if late chunking is applied } // Create embeddings for input texts @@ -20,6 +25,16 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd task: task } + if v := params.type_ { + embedding_input.type_ = v + } + + if v := params.truncate { + embedding_input.truncate = v + } + + embedding_input.late_chunking = if _ := params.late_chunking { true } else { false } + req := httpconnection.Request{ method: .post prefix: 'v1/embeddings' @@ -32,37 +47,6 @@ pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbedd return parse_model_embedding_output(response)! } -// pub fn (mut j Jina) start_bulk_embedding(file_path string, model string, email string) !BulkEmbeddingJobResponse { -// // Read the file content -// file_content := os.read_file(file_path) or { -// return error('Failed to read file: ${err}') -// } - -// // Create a multipart form -// mut form := http.FormData{} -// form.add_field('file', file_content, 'input.csv', 'text/csv') -// form.add_field('model', model) -// form.add_field('email', email) - -// // Create a custom HTTP request -// mut req := http.new_request(.post, '${j.base_url}/v1/bulk-embeddings', '')! -// req.header = j.http.default_header // Add Authorization header -// req.set_form_data(form) // Set multipart form data - -// // Send the request -// response := req.do() or { -// return error('Failed to send bulk embedding request: ${err}') -// } - -// // Check for errors -// if response.status_code != 200 { -// return error('Bulk embedding request failed with status ${response.status_code}: ${response.body}') -// } - -// // Parse the JSON response -// return json.decode(BulkEmbeddingJobResponse, response.body)! -// } - // // Create embeddings with a TextDoc input // pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput { diff --git a/lib/clients/jina/jina_client_test.v b/lib/clients/jina/jina_client_test.v new file mode 100644 index 00000000..786e498d --- /dev/null +++ b/lib/clients/jina/jina_client_test.v @@ -0,0 +1,19 @@ +module jina + +fn setup_client() !&Jina { + mut client := get()! + return client +} + +fn test_create_embeddings() { + mut client := setup_client()! + embeddings := client.create_embeddings( + input: ['Hello', 'World'] + model: .jina_embeddings_v3 + task: 'separation' + ) or { panic('Error while creating embeddings: ${err}') } + + assert embeddings.data.len > 0 + assert embeddings.object == 'list' // Check the object type + assert embeddings.model == 'jina-embeddings-v3' +} diff --git a/lib/clients/jina/model_embed.v b/lib/clients/jina/model_embed.v index bdd92a06..159296b0 100644 --- a/lib/clients/jina/model_embed.v +++ b/lib/clients/jina/model_embed.v @@ -2,44 +2,44 @@ module jina import json -// JinaModelEnumerator represents the available models for Jina API -pub enum JinaModelEnumerator { - clip_v1 // jina-clip-v1, 223M, 768 - clip_v2 // jina-clip-v2, 865M, 1024 - embeddings_v2_base_en // jina-embeddings-v2-base-en, 137M, 768 - embeddings_v2_base_es // jina-embeddings-v2-base-es, 161M, 768 - embeddings_v2_base_de // jina-embeddings-v2-base-de, 161M, 768 - embeddings_v2_base_zh // jina-embeddings-v2-base-zh, 161M, 768 - embeddings_v2_base_code // jina-embeddings-v2-base-code, 137M, 768 - embeddings_v3 // jina-embeddings-v3, 570M, 1024 +// JinaModel represents the available Jina models +pub enum JinaModel { + jina_clip_v1 + jina_clip_v2 + jina_embeddings_v2_base_en + jina_embeddings_v2_base_es + jina_embeddings_v2_base_de + jina_embeddings_v2_base_zh + jina_embeddings_v2_base_code + jina_embeddings_v3 } -// to_string converts JinaModelEnumerator enum to its string representation -pub fn (m JinaModelEnumerator) to_string() string { +// to_string converts a JinaModel enum to its string representation as expected by the Jina API +pub fn (m JinaModel) to_string() string { return match m { - .clip_v1 { 'jina-clip-v1' } - .clip_v2 { 'jina-clip-v2' } - .embeddings_v2_base_en { 'jina-embeddings-v2-base-en' } - .embeddings_v2_base_es { 'jina-embeddings-v2-base-es' } - .embeddings_v2_base_de { 'jina-embeddings-v2-base-de' } - .embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' } - .embeddings_v2_base_code { 'jina-embeddings-v2-base-code' } - .embeddings_v3 { 'jina-embeddings-v3' } + .jina_clip_v1 { 'jina-clip-v1' } + .jina_clip_v2 { 'jina-clip-v2' } + .jina_embeddings_v2_base_en { 'jina-embeddings-v2-base-en' } + .jina_embeddings_v2_base_es { 'jina-embeddings-v2-base-es' } + .jina_embeddings_v2_base_de { 'jina-embeddings-v2-base-de' } + .jina_embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' } + .jina_embeddings_v2_base_code { 'jina-embeddings-v2-base-code' } + .jina_embeddings_v3 { 'jina-embeddings-v3' } } } -// from_string converts string to JinaModelEnumerator enum -pub fn jina_model_from_string(s string) !JinaModelEnumerator { +// from_string converts a string to a JinaModel enum, returning an error if the string is invalid +pub fn jina_model_from_string(s string) !JinaModel { return match s { - 'jina-clip-v1' { JinaModelEnumerator.clip_v1 } - 'jina-clip-v2' { JinaModelEnumerator.clip_v2 } - 'jina-embeddings-v2-base-en' { JinaModelEnumerator.embeddings_v2_base_en } - 'jina-embeddings-v2-base-es' { JinaModelEnumerator.embeddings_v2_base_es } - 'jina-embeddings-v2-base-de' { JinaModelEnumerator.embeddings_v2_base_de } - 'jina-embeddings-v2-base-zh' { JinaModelEnumerator.embeddings_v2_base_zh } - 'jina-embeddings-v2-base-code' { JinaModelEnumerator.embeddings_v2_base_code } - 'jina-embeddings-v3' { JinaModelEnumerator.embeddings_v3 } - else { error('Invalid model string: ${s}') } + 'jina-clip-v1' { JinaModel.jina_clip_v1 } + 'jina-clip-v2' { JinaModel.jina_clip_v2 } + 'jina-embeddings-v2-base-en' { JinaModel.jina_embeddings_v2_base_en } + 'jina-embeddings-v2-base-es' { JinaModel.jina_embeddings_v2_base_es } + 'jina-embeddings-v2-base-de' { JinaModel.jina_embeddings_v2_base_de } + 'jina-embeddings-v2-base-zh' { JinaModel.jina_embeddings_v2_base_zh } + 'jina-embeddings-v2-base-code' { JinaModel.jina_embeddings_v2_base_code } + 'jina-embeddings-v3' { JinaModel.jina_embeddings_v3 } + else { error('Invalid Jina model string: ${s}') } } }