diff --git a/examples/clients/jina.vsh b/examples/clients/jina.vsh new file mode 100755 index 00000000..7da391d8 --- /dev/null +++ b/examples/clients/jina.vsh @@ -0,0 +1,60 @@ +#!/usr/bin/env -S v -n -w -gc none -cc tcc -d use_openssl -enable-globals run + +import freeflowuniverse.herolib.clients.jina + +mut jina_client := jina.get()! + +// Create embeddings +embeddings := jina_client.create_embeddings( + input: ['Hello', 'World'] + model: .jina_embeddings_v3 + task: 'separation' +) or { panic('Error while creating embeddings: ${err}') } + +println('Created embeddings: ${embeddings}') + +// Rerank +rerank_result := jina_client.rerank( + model: .reranker_v2_base_multilingual + query: 'skincare products' + documents: ['Product A', 'Product B', 'Product C'] + top_n: 2 +) or { panic('Error while reranking: ${err}') } + +println('Rerank result: ${rerank_result}') + +// Train +train_result := jina_client.train( + model: .jina_clip_v1 + input: [ + jina.TrainingExample{ + text: 'Sample text' + label: 'positive' + }, + jina.TrainingExample{ + image: 'https://letsenhance.io/static/73136da51c245e80edc6ccfe44888a99/1015f/MainBefore.jpg' + label: 'negative' + }, + ] +) or { panic('Error while training: ${err}') } + +println('Train result: ${train_result}') + +// Classify +classify_result := jina_client.classify( + model: .jina_clip_v1 + input: [ + jina.ClassificationInput{ + text: 'A photo of a cat' + }, + jina.ClassificationInput{ + image: 'https://letsenhance.io/static/73136da51c245e80edc6ccfe44888a99/1015f/MainBefore.jpg' + }, + ] + labels: ['cat', 'dog'] +) or { panic('Error while classifying: ${err}') } + +println('Classification result: ${classify_result}') + +classifiers := jina_client.list_classifiers() or { panic('Error fetching classifiers: ${err}') } +println('Classifiers: ${classifiers}') diff --git a/lib/clients/jina/classification_api.v b/lib/clients/jina/classification_api.v new file mode 100644 index 00000000..820697d6 --- /dev/null +++ b/lib/clients/jina/classification_api.v @@ -0,0 +1,291 @@ +module jina + +import json +import freeflowuniverse.herolib.core.httpconnection + +// ClassificationTrainAccess represents the accessibility of the classifier +pub enum ClassificationTrainAccess { + public // Classifier is publicly accessible + private // Classifier is private (default) +} + +// TrainingExample represents a single training example (either text or image with a label) +pub struct TrainingExample { +pub mut: + text ?string // Optional text content + image ?string // Optional image URL + label string // Required label +} + +// ClassificationTrainOutput represents the response from the training endpoint +pub struct ClassificationTrainOutput { +pub mut: + classifier_id string // Identifier of the trained classifier + num_samples int // Number of samples used in training + usage ClassificationTrainUsage // Token usage details +} + +// ClassificationTrainUsage represents token usage for the training request +pub struct ClassificationTrainUsage { +pub mut: + total_tokens int // Total tokens consumed +} + +// ClassificationTrain represents parameters for the training request +@[params] +pub struct ClassificationTrain { +pub mut: + model ?JinaModel // Optional model identifier (e.g., jina-clip-v1) + classifier_id ?string // Optional existing classifier ID + access ?ClassificationTrainAccess = .private // Accessibility, defaults to private + input []TrainingExample // Array of training examples + num_iters ?int = 10 // Number of training iterations, defaults to 10 +} + +// TrainRequest represents the JSON request body for the /v1/train endpoint +struct TrainRequest { +mut: + model ?string + classifier_id ?string + access ?string + input []TrainingExample + num_iters ?int +} + +// Train a classifier by sending a POST request to /v1/train +pub fn (mut j Jina) train(params ClassificationTrain) !ClassificationTrainOutput { + // Validate that only one of model or classifier_id is provided + mut model_provided := false + mut classifier_id_provided := false + if _ := params.model { + model_provided = true + } + + if _ := params.classifier_id { + classifier_id_provided = true + } + + if model_provided && classifier_id_provided { + return error('Provide either model or classifier_id, not both') + } + + if model := params.model { + if model == .jina_embeddings_v3 { + return error('jina-embeddings-v3 is not a valid model for classification') + } + } + + // Validate each training example has exactly one of text or image + for example in params.input { + mut text_provided := false + mut image_provided := false + + if _ := example.text { + text_provided = true + } + + if _ := example.image { + image_provided = true + } + + if text_provided && image_provided { + return error('Each training example must have either text or image, not both') + } + + if !text_provided && !image_provided { + return error('Each training example must have either text or image') + } + } + + // Construct the request body + mut request := TrainRequest{ + input: params.input + } + if v := params.model { + request.model = v.to_string() // Convert JinaModel enum to string + } + if v := params.classifier_id { + request.classifier_id = v + } + if v := params.access { + request.access = match v { + .public { 'public' } + .private { 'private' } + } + } + if v := params.num_iters { + request.num_iters = v + } + + // Create and send the HTTP request + req := httpconnection.Request{ + method: .post + prefix: 'v1/train' + dataformat: .json + data: json.encode(request) + } + + mut httpclient := j.httpclient()! + response := httpclient.post_json_str(req)! + result := json.decode(ClassificationTrainOutput, response)! + return result +} + +// TextDoc represents a text document for classification +pub struct TextDoc { +pub mut: + text string // The text content +} + +// ImageDoc represents an image document for classification +pub struct ImageDoc { +pub mut: + image string // The image URL or base64-encoded string +} + +// ClassificationInput represents a single input for classification (text or image) +pub struct ClassificationInput { +pub mut: + text ?string // Optional text content + image ?string // Optional image content +} + +// ClassificationOutput represents the response from the classify endpoint +pub struct ClassificationOutput { +pub mut: + data []ClassificationResult // List of classification results + usage ClassificationUsage // Token usage details +} + +// ClassificationResult represents a single classification result +pub struct ClassificationResult { +pub mut: + index int // Index of the input + prediction string // Predicted label + score f64 // Confidence score + object string // Type of object (e.g., "classification") + predictions []LabelScore // List of label scores +} + +// LabelScore represents a label and its corresponding score +pub struct LabelScore { +pub mut: + label string // Label name + score f64 // Confidence score +} + +// ClassificationUsage represents token usage for the classification request +pub struct ClassificationUsage { +pub mut: + total_tokens int // Total tokens consumed +} + +// ClassifyRequest represents the JSON request body for the /v1/classify endpoint +struct ClassifyRequest { +mut: + model ?string + classifier_id ?string + input []ClassificationInput + labels []string +} + +// ClassifyParams represents parameters for the classification request +@[params] +pub struct ClassifyParams { +pub mut: + model ?JinaModel // Optional model identifier + classifier_id ?string // Optional classifier ID + input []ClassificationInput // Array of inputs (text or image) + labels []string // List of labels for classification +} + +// Classify inputs by sending a POST request to /v1/classify +pub fn (mut j Jina) classify(params ClassifyParams) !ClassificationOutput { + // Validate that only one of model or classifier_id is provided + mut model_provided := false + mut classifier_id_provided := false + if _ := params.model { + model_provided = true + } + if _ := params.classifier_id { + classifier_id_provided = true + } + if model_provided && classifier_id_provided { + return error('Provide either model or classifier_id, not both') + } + if !model_provided && !classifier_id_provided { + return error('Either model or classifier_id must be provided') + } + + // Validate each input has exactly one of text or image + for input in params.input { + mut text_provided := false + mut image_provided := false + if _ := input.text { + text_provided = true + } + if _ := input.image { + image_provided = true + } + if text_provided && image_provided { + return error('Each input must have either text or image, not both') + } + if !text_provided && !image_provided { + return error('Each input must have either text or image') + } + } + + // Construct the request body + mut request := ClassifyRequest{ + input: params.input + labels: params.labels + } + if v := params.model { + request.model = v.to_string() // Convert JinaModel enum to string + } + if v := params.classifier_id { + request.classifier_id = v + } + + // Create and send the HTTP request + req := httpconnection.Request{ + method: .post + prefix: 'v1/classify' + dataformat: .json + data: json.encode(request) + } + + mut httpclient := j.httpclient()! + response := httpclient.post_json_str(req)! + result := json.decode(ClassificationOutput, response)! + return result +} + +// Define the Classifier struct +pub struct Classifier { +pub mut: + classifier_id string + model_name string + labels []string + access string + updated_number int + used_number int + created_at string + updated_at string + used_at ?string + metadata map[string]string +} + +// Implement the list_classifiers function +pub fn (mut j Jina) list_classifiers() ![]Classifier { + req := httpconnection.Request{ + method: .get + prefix: 'v1/classifiers' + } + + mut httpclient := j.httpclient()! + response := httpclient.get(req)! + println('response: ${response}') + classifiers := json.decode([]Classifier, response)! + return classifiers +} diff --git a/lib/clients/jina/jina_client.v b/lib/clients/jina/jina_client.v index c33b47bd..5ef4c919 100644 --- a/lib/clients/jina/jina_client.v +++ b/lib/clients/jina/jina_client.v @@ -1,185 +1,245 @@ module jina import freeflowuniverse.herolib.core.httpconnection -import json import os +import json + +@[params] +pub struct CreateEmbeddingParams { +pub mut: + input []string @[required] // Input texts + model JinaModel @[required] // Model name + task string @[required] // Task type + type_ ?EmbeddingType // embedding type + truncate ?TruncateType // truncation type + late_chunking ?bool // Flag to determine if late chunking is applied +} // Create embeddings for input texts -pub fn (mut j Jina) create_embeddings(input []string, model string, task string) !ModelEmbeddingOutput { +pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbeddingOutput { + task := task_type_from_string(params.task)! + mut embedding_input := TextEmbeddingInput{ - model: model - input: input - task: task + input: params.input + model: params.model.to_string() + task: task } - + + if v := params.type_ { + embedding_input.type_ = v + } + + if v := params.truncate { + embedding_input.truncate = v + } + + embedding_input.late_chunking = if _ := params.late_chunking { true } else { false } + req := httpconnection.Request{ - method: .post - prefix: 'v1/embeddings' + method: .post + prefix: 'v1/embeddings' dataformat: .json - data: embedding_input.to_json() + data: embedding_input.to_json() } - - response := j.http.get(req)! + + mut httpclient := j.httpclient()! + response := httpclient.post_json_str(req)! return parse_model_embedding_output(response)! } -// Create embeddings with a TextDoc input -pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput { - - req := httpconnection.Request{ - method: .post - prefix: 'v1/embeddings' - dataformat: .json - data: json.encode(args) - } - - response := j.http.get(req)! - return parse_model_embedding_output(response)! +@[params] +pub struct RerankParams { +pub mut: + model JinaRerankModel @[required] + query string @[required] + documents []string @[required] + top_n ?int // Optional: Number of top results to return + return_documents ?bool // Optional: Flag to determine if the documents should be returned } // Rerank documents based on a query -pub fn (mut j Jina) rerank(query string, documents []string, model string, top_n int) !RankingOutput { - mut rank_input := RankAPIInput{ - model: model - query: query - documents: documents - top_n: top_n +pub fn (mut j Jina) rerank(params RerankParams) !RankingOutput { + mut rank_input := RerankInput{ + model: params.model.to_string() + query: params.query + documents: params.documents } - + + if v := params.top_n { + rank_input.top_n = v + } + + if v := params.return_documents { + rank_input.return_documents = v + } + req := httpconnection.Request{ - method: .post - prefix: 'v1/rerank' + method: .post + prefix: 'v1/rerank' dataformat: .json - data: rank_input.to_json() + data: json.encode(rank_input) } - - response := j.http.get(req)! - return parse_ranking_output(response)! + + mut httpclient := j.httpclient()! + response := httpclient.post_json_str(req)! + return json.decode(RankingOutput, response)! } -// Simplified rerank function with default top_n -pub fn (mut j Jina) rerank_simple(query string, documents []string, model string) !RankingOutput { - return j.rerank(query, documents, model, 0)! -} +// // Create embeddings with a TextDoc input +// pub fn (mut j Jina) create_embeddings_with_docs(args TextEmbeddingInput) !ModelEmbeddingOutput { -// Classify input texts -pub fn (mut j Jina) classify(input []string, model string, labels []string) !ClassificationOutput { - mut classification_input := ClassificationAPIInput{ - model: model - input: input - labels: labels - } - - req := httpconnection.Request{ - method: .post - prefix: 'v1/classify' - dataformat: .json - data: classification_input.to_json() - } - - response := j.http.get(req)! - return parse_classification_output(response)! -} +// req := httpconnection.Request{ +// method: .post +// prefix: 'v1/embeddings' +// dataformat: .json +// data: json.encode(args) +// } -// Train a classifier -pub fn (mut j Jina) train(examples []TrainingExample, model string, access string) !TrainingOutput { - mut training_input := TrainingAPIInput{ - model: model - input: examples - access: access - } - - req := httpconnection.Request{ - method: .post - prefix: 'v1/train' - dataformat: .json - data: training_input.to_json() - } - - response := j.http.get(req)! - return parse_training_output(response)! -} +// response := j.http.get(req)! +// return parse_model_embedding_output(response)! +// } -// List classifiers -pub fn (mut j Jina) list_classifiers() !string { - req := httpconnection.Request{ - method: .get - prefix: 'v1/classifiers' - } - - return j.http.get(req)! -} +// // Rerank documents based on a query +// pub fn (mut j Jina) rerank(query string, documents []string, model string, top_n int) !RankingOutput { +// mut rank_input := RankAPIInput{ +// model: model +// query: query +// documents: documents +// top_n: top_n +// } -// Delete a classifier -pub fn (mut j Jina) delete_classifier(classifier_id string) !bool { - req := httpconnection.Request{ - method: .delete - prefix: 'v1/classifiers/${classifier_id}' - } - - j.http.get(req)! - return true -} +// req := httpconnection.Request{ +// method: .post +// prefix: 'v1/rerank' +// dataformat: .json +// data: rank_input.to_json() +// } -// Create multi-vector embeddings -pub fn (mut j Jina) create_multi_vector(input []string, model string) !ColbertModelEmbeddingsOutput { - mut data := map[string]json.Any{} - data['model'] = model - data['input'] = input - - req := httpconnection.Request{ - method: .post - prefix: 'v1/multi-embeddings' - dataformat: .json - data: json.encode(data) - } - - response := j.http.get(req)! - return parse_colbert_model_embeddings_output(response)! -} +// response := j.http.get(req)! +// return parse_ranking_output(response)! +// } -// Start a bulk embedding job -pub fn (mut j Jina) start_bulk_embedding(file_path string, model string, email string) !BulkEmbeddingJobResponse { - // This endpoint requires multipart/form-data which is not directly supported by the current HTTPConnection - // We need to implement a custom solution for this - return error('Bulk embedding is not implemented yet') -} +// // Simplified rerank function with default top_n +// pub fn (mut j Jina) rerank_simple(query string, documents []string, model string) !RankingOutput { +// return j.rerank(query, documents, model, 0)! +// } -// Check the status of a bulk embedding job -pub fn (mut j Jina) check_bulk_embedding_status(job_id string) !BulkEmbeddingJobResponse { - req := httpconnection.Request{ - method: .get - prefix: 'v1/bulk-embeddings/${job_id}' - } - - response := j.http.get(req)! - return parse_bulk_embedding_job_response(response)! -} +// // Classify input texts +// pub fn (mut j Jina) classify(input []string, model string, labels []string) !ClassificationOutput { +// mut classification_input := ClassificationAPIInput{ +// model: model +// input: input +// labels: labels +// } -// Download the result of a bulk embedding job -pub fn (mut j Jina) download_bulk_embedding_result(job_id string) !DownloadResultResponse { - req := httpconnection.Request{ - method: .post - prefix: 'v1/bulk-embeddings/${job_id}/download-result' - } - - response := j.http.get(req)! - return parse_download_result_response(response)! -} +// req := httpconnection.Request{ +// method: .post +// prefix: 'v1/classify' +// dataformat: .json +// data: classification_input.to_json() +// } -// Check if the API key is valid by making a simple request -pub fn (mut j Jina) check_auth() !bool { - req := httpconnection.Request{ - method: .get - prefix: '/' - } - - j.http.get(req) or { - return error('Failed to connect to Jina API: ${err}') - } - - // If we get a response, the API key is valid - return true -} +// response := j.http.get(req)! +// return parse_classification_output(response)! +// } +// // Train a classifier +// pub fn (mut j Jina) train(examples []TrainingExample, model string, access string) !TrainingOutput { +// mut training_input := TrainingAPIInput{ +// model: model +// input: examples +// access: access +// } +// req := httpconnection.Request{ +// method: .post +// prefix: 'v1/train' +// dataformat: .json +// data: training_input.to_json() +// } + +// response := j.http.get(req)! +// return parse_training_output(response)! +// } + +// // List classifiers +// pub fn (mut j Jina) list_classifiers() !string { +// req := httpconnection.Request{ +// method: .get +// prefix: 'v1/classifiers' +// } + +// return j.http.get(req)! +// } + +// // Delete a classifier +// pub fn (mut j Jina) delete_classifier(classifier_id string) !bool { +// req := httpconnection.Request{ +// method: .delete +// prefix: 'v1/classifiers/${classifier_id}' +// } + +// j.http.get(req)! +// return true +// } + +// // Create multi-vector embeddings +// pub fn (mut j Jina) create_multi_vector(input []string, model string) !ColbertModelEmbeddingsOutput { +// mut data := map[string]json.Any{} +// data['model'] = model +// data['input'] = input + +// req := httpconnection.Request{ +// method: .post +// prefix: 'v1/multi-embeddings' +// dataformat: .json +// data: json.encode(data) +// } + +// response := j.http.get(req)! +// return parse_colbert_model_embeddings_output(response)! +// } + +// // Start a bulk embedding job +// pub fn (mut j Jina) start_bulk_embedding(file_path string, model string, email string) !BulkEmbeddingJobResponse { +// // This endpoint requires multipart/form-data which is not directly supported by the current HTTPConnection +// // We need to implement a custom solution for this +// return error('Bulk embedding is not implemented yet') +// } + +// // Check the status of a bulk embedding job +// pub fn (mut j Jina) check_bulk_embedding_status(job_id string) !BulkEmbeddingJobResponse { +// req := httpconnection.Request{ +// method: .get +// prefix: 'v1/bulk-embeddings/${job_id}' +// } + +// response := j.http.get(req)! +// return parse_bulk_embedding_job_response(response)! +// } + +// // Download the result of a bulk embedding job +// pub fn (mut j Jina) download_bulk_embedding_result(job_id string) !DownloadResultResponse { +// req := httpconnection.Request{ +// method: .post +// prefix: 'v1/bulk-embeddings/${job_id}/download-result' +// } + +// response := j.http.get(req)! +// return parse_download_result_response(response)! +// } + +// // Check if the API key is valid by making a simple request +// pub fn (mut j Jina) check_auth() !bool { +// req := httpconnection.Request{ +// method: .get +// prefix: '/' +// } + +// j.http.get(req) or { +// return error('Failed to connect to Jina API: ${err}') +// } + +// // If we get a response, the API key is valid +// return true +// } diff --git a/lib/clients/jina/jina_client_test.v b/lib/clients/jina/jina_client_test.v new file mode 100644 index 00000000..1b453f02 --- /dev/null +++ b/lib/clients/jina/jina_client_test.v @@ -0,0 +1,87 @@ +module jina + +import time + +fn setup_client() !&Jina { + mut client := get()! + return client +} + +fn test_create_embeddings() { + time.sleep(1 * time.second) + mut client := setup_client()! + embeddings := client.create_embeddings( + input: ['Hello', 'World'] + model: .jina_embeddings_v3 + task: 'separation' + ) or { panic('Error while creating embeddings: ${err}') } + + assert embeddings.data.len > 0 + assert embeddings.object == 'list' // Check the object type + assert embeddings.model == 'jina-embeddings-v3' +} + +fn test_rerank() { + time.sleep(1 * time.second) + mut client := setup_client()! + rerank_result := client.rerank( + model: .reranker_v2_base_multilingual + query: 'skincare products' + documents: ['Product A', 'Product B', 'Product C'] + top_n: 2 + ) or { panic('Error while reranking: ${err}') } + + assert rerank_result.results.len == 2 + assert rerank_result.model == 'jina-reranker-v2-base-multilingual' +} + +fn test_train() { + time.sleep(1 * time.second) + mut client := setup_client()! + train_result := client.train( + model: .jina_clip_v1 + input: [ + TrainingExample{ + text: 'A photo of a cat' + label: 'cat' + }, + TrainingExample{ + text: 'A photo of a dog' + label: 'dog' + }, + ] + ) or { panic('Error while training: ${err}') } + + assert train_result.classifier_id.len > 0 + assert train_result.num_samples == 2 +} + +fn test_classify() { + time.sleep(1 * time.second) + mut client := setup_client()! + classify_result := client.classify( + model: .jina_clip_v1 + input: [ + ClassificationInput{ + text: 'A photo of a cat' + }, + ClassificationInput{ + image: 'https://letsenhance.io/static/73136da51c245e80edc6ccfe44888a99/1015f/MainBefore.jpg' + }, + ] + labels: ['cat', 'dog'] + ) or { panic('Error while classifying: ${err}') } + + assert classify_result.data.len == 2 + assert classify_result.data[0].prediction in ['cat', 'dog'] + assert classify_result.data[1].prediction in ['cat', 'dog'] + assert classify_result.data[0].object == 'classification' + assert classify_result.data[1].object == 'classification' +} + +fn test_get_classifiers() { + time.sleep(1 * time.second) + mut client := setup_client()! + classifiers := client.list_classifiers() or { panic('Error fetching classifiers: ${err}') } + assert classifiers.len != 0 +} diff --git a/lib/clients/jina/jina_model.v b/lib/clients/jina/jina_model.v index c3df3f33..8057e768 100644 --- a/lib/clients/jina/jina_model.v +++ b/lib/clients/jina/jina_model.v @@ -1,6 +1,5 @@ module jina -import freeflowuniverse.herolib.data.paramsparser import freeflowuniverse.herolib.data.encoderhero import freeflowuniverse.herolib.core.httpconnection import net.http @@ -17,16 +16,29 @@ const env_key = 'JINAKEY' @[heap] pub struct Jina { pub mut: - name string = 'default' - secret string - base_url string = api_base_url - http httpconnection.HTTPConnection @[str: skip] + name string = 'default' + secret string + base_url string = api_base_url + // http httpconnection.HTTPConnection @[str: skip] +} + +fn (mut self Jina) httpclient() !&httpconnection.HTTPConnection { + mut http_conn := httpconnection.new( + name: 'Jina_vclient' + url: self.base_url + )! + + // Add authentication header if API key is provided + if self.secret.len > 0 { + http_conn.default_header.add(.authorization, 'Bearer ${self.secret}') + } + return http_conn } // your checking & initialization code if needed fn obj_init(mycfg_ Jina) !Jina { mut mycfg := mycfg_ - + // Get API key from environment variable if not set if mycfg.secret == '' { if env_key in os.environ() { @@ -35,16 +47,7 @@ fn obj_init(mycfg_ Jina) !Jina { return error('Jina API key not provided and ${env_key} environment variable not set') } } - - // Initialize HTTP connection - mut header := http.new_header() - header.add_custom('Authorization', 'Bearer ${mycfg.secret}') - - mycfg.http = httpconnection.HTTPConnection{ - base_url: mycfg.base_url - default_header: header - } - + return mycfg } diff --git a/lib/clients/jina/model_embed.v b/lib/clients/jina/model_embed.v index 69770e95..f7b564e4 100644 --- a/lib/clients/jina/model_embed.v +++ b/lib/clients/jina/model_embed.v @@ -2,53 +2,53 @@ module jina import json -// JinaModelEnumerator represents the available models for Jina API -pub enum JinaModelEnumerator { - clip_v1 // jina-clip-v1, 223M, 768 - clip_v2 // jina-clip-v2, 865M, 1024 - embeddings_v2_base_en // jina-embeddings-v2-base-en, 137M, 768 - embeddings_v2_base_es // jina-embeddings-v2-base-es, 161M, 768 - embeddings_v2_base_de // jina-embeddings-v2-base-de, 161M, 768 - embeddings_v2_base_zh // jina-embeddings-v2-base-zh, 161M, 768 - embeddings_v2_base_code // jina-embeddings-v2-base-code, 137M, 768 - embeddings_v3 // jina-embeddings-v3, 570M, 1024 +// JinaModel represents the available Jina models +pub enum JinaModel { + jina_clip_v1 + jina_clip_v2 + jina_embeddings_v2_base_en + jina_embeddings_v2_base_es + jina_embeddings_v2_base_de + jina_embeddings_v2_base_zh + jina_embeddings_v2_base_code + jina_embeddings_v3 } -// to_string converts JinaModelEnumerator enum to its string representation -pub fn (m JinaModelEnumerator) to_string() string { +// to_string converts a JinaModel enum to its string representation as expected by the Jina API +pub fn (m JinaModel) to_string() string { return match m { - .clip_v1 { 'jina-clip-v1' } - .clip_v2 { 'jina-clip-v2' } - .embeddings_v2_base_en { 'jina-embeddings-v2-base-en' } - .embeddings_v2_base_es { 'jina-embeddings-v2-base-es' } - .embeddings_v2_base_de { 'jina-embeddings-v2-base-de' } - .embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' } - .embeddings_v2_base_code { 'jina-embeddings-v2-base-code' } - .embeddings_v3 { 'jina-embeddings-v3' } + .jina_clip_v1 { 'jina-clip-v1' } + .jina_clip_v2 { 'jina-clip-v2' } + .jina_embeddings_v2_base_en { 'jina-embeddings-v2-base-en' } + .jina_embeddings_v2_base_es { 'jina-embeddings-v2-base-es' } + .jina_embeddings_v2_base_de { 'jina-embeddings-v2-base-de' } + .jina_embeddings_v2_base_zh { 'jina-embeddings-v2-base-zh' } + .jina_embeddings_v2_base_code { 'jina-embeddings-v2-base-code' } + .jina_embeddings_v3 { 'jina-embeddings-v3' } } } -// from_string converts string to JinaModelEnumerator enum -pub fn jina_model_from_string(s string) ?JinaModelEnumerator { +// from_string converts a string to a JinaModel enum, returning an error if the string is invalid +pub fn jina_model_from_string(s string) !JinaModel { return match s { - 'jina-clip-v1' { JinaModelEnumerator.clip_v1 } - 'jina-clip-v2' { JinaModelEnumerator.clip_v2 } - 'jina-embeddings-v2-base-en' { JinaModelEnumerator.embeddings_v2_base_en } - 'jina-embeddings-v2-base-es' { JinaModelEnumerator.embeddings_v2_base_es } - 'jina-embeddings-v2-base-de' { JinaModelEnumerator.embeddings_v2_base_de } - 'jina-embeddings-v2-base-zh' { JinaModelEnumerator.embeddings_v2_base_zh } - 'jina-embeddings-v2-base-code' { JinaModelEnumerator.embeddings_v2_base_code } - 'jina-embeddings-v3' { JinaModelEnumerator.embeddings_v3 } - else { error('Invalid model string: $s') } + 'jina-clip-v1' { JinaModel.jina_clip_v1 } + 'jina-clip-v2' { JinaModel.jina_clip_v2 } + 'jina-embeddings-v2-base-en' { JinaModel.jina_embeddings_v2_base_en } + 'jina-embeddings-v2-base-es' { JinaModel.jina_embeddings_v2_base_es } + 'jina-embeddings-v2-base-de' { JinaModel.jina_embeddings_v2_base_de } + 'jina-embeddings-v2-base-zh' { JinaModel.jina_embeddings_v2_base_zh } + 'jina-embeddings-v2-base-code' { JinaModel.jina_embeddings_v2_base_code } + 'jina-embeddings-v3' { JinaModel.jina_embeddings_v3 } + else { error('Invalid Jina model string: ${s}') } } } // EmbeddingType represents the available embedding types pub enum EmbeddingType { - float // "float" - base64 // "base64" - binary // "binary" - ubinary // "ubinary" + float // "float" + base64 // "base64" + binary // "binary" + ubinary // "ubinary" } // to_string converts EmbeddingType enum to its string representation @@ -68,17 +68,17 @@ pub fn embedding_type_from_string(s string) !EmbeddingType { 'base64' { EmbeddingType.base64 } 'binary' { EmbeddingType.binary } 'ubinary' { EmbeddingType.ubinary } - else { error('Invalid embedding type string: $s') } + else { error('Invalid embedding type string: ${s}') } } } // TaskType represents the available task types for embeddings pub enum TaskType { - retrieval_query // "retrieval.query" - retrieval_passage // "retrieval.passage" - text_matching // "text-matching" - classification // "classification" - separation // "separation" + retrieval_query // "retrieval.query" + retrieval_passage // "retrieval.passage" + text_matching // "text-matching" + classification // "classification" + separation // "separation" } // to_string converts TaskType enum to its string representation @@ -100,13 +100,13 @@ pub fn task_type_from_string(s string) !TaskType { 'text-matching' { TaskType.text_matching } 'classification' { TaskType.classification } 'separation' { TaskType.separation } - else { error('Invalid task type string: $s') } + else { error('Invalid task type string: ${s}') } } } // TruncateType represents the available truncation options pub enum TruncateType { - none // "NONE" + none_ // "NONE" start // "START" end // "END" } @@ -114,7 +114,7 @@ pub enum TruncateType { // to_string converts TruncateType enum to its string representation pub fn (t TruncateType) to_string() string { return match t { - .none { 'NONE' } + .none_ { 'NONE' } .start { 'START' } .end { 'END' } } @@ -123,90 +123,89 @@ pub fn (t TruncateType) to_string() string { // from_string converts string to TruncateType enum pub fn truncate_type_from_string(s string) !TruncateType { return match s { - 'NONE' { TruncateType.none } + 'NONE' { TruncateType.none_ } 'START' { TruncateType.start } 'END' { TruncateType.end } - else { error('Invalid truncate type string: $s') } + else { error('Invalid truncate type string: ${s}') } } } // TextEmbeddingInputRaw represents the raw input for text embedding requests as sent to the server struct TextEmbeddingInputRaw { mut: - model string = 'jina-embeddings-v2-base-en' - input []string @[required] - task string // Optional: task type as string - type_ string @[json: 'type'] // Optional: embedding type as string - truncate string // Optional: "NONE", "START", "END" - late_chunking bool // Optional: Flag to determine if late chunking is applied + model string = 'jina-embeddings-v2-base-en' + input []string @[required] + task string // Optional: task type as string + type_ string @[json: 'type'] // Optional: embedding type as string + truncate string // Optional: "NONE", "START", "END" + late_chunking bool // Optional: Flag to determine if late chunking is applied } // TextEmbeddingInput represents the input for text embedding requests with enum types pub struct TextEmbeddingInput { pub mut: - model JinaModelEnumerator = JinaModelEnumerator.embeddings_v2_base_en - input []string @[required] - task TaskType // task type - type_ EmbeddingType // embedding type - truncate TruncateType // truncation type - late_chunking bool //Flag to determine if late chunking is applied + model string = 'jina-embeddings-v2-base-en' + input []string @[required] + task TaskType // task type + type_ ?EmbeddingType // embedding type + truncate ?TruncateType // truncation type + late_chunking ?bool // Flag to determine if late chunking is applied } // dumps converts TextEmbeddingInput to JSON string pub fn (t TextEmbeddingInput) dumps() !string { mut raw := TextEmbeddingInputRaw{ - model: t.model.to_string() - input: t.input - late_chunking: t.late_chunking + model: t.model + input: t.input + late_chunking: if v := t.late_chunking { true } else { false } } - + raw.task = t.task.to_string() - raw.type_ = t.type_.to_string() - raw.truncate = t.truncate.to_string() + if v := t.type_ { + raw.type_ = v.to_string() + } + + if v := t.truncate { + raw.truncate = v.to_string() + } + return json.encode(raw) } // from_raw converts TextEmbeddingInputRaw to TextEmbeddingInput -pub fn loads_text_embedding_input(text string ) !TextEmbeddingInput { - // TODO: go from text to InputObject over json - mut input := TextEmbeddingInput{ - model: jina_model_from_string(raw.model)? - input: raw.input - late_chunking: raw.late_chunking - } - - if raw.task != '' { - input.task = task_type_from_string(raw.task)! - } - - if raw.type_ != '' { - input.type_ = embedding_type_from_string(raw.type_)! - } - - if raw.truncate != '' { - input.truncate = truncate_type_from_string(raw.truncate)! - } - - return input -} +// pub fn loads_text_embedding_input(text string) !TextEmbeddingInput { +// // TODO: go from text to InputObject over json +// // mut input := TextEmbeddingInput{ +// // model: jina_model_from_string(raw.model)? +// // input: raw.input +// // late_chunking: raw.late_chunking +// // } + +// // if raw.task != '' { +// // input.task = task_type_from_string(raw.task)! +// // } + +// // if raw.type_ != '' { +// // input.type_ = embedding_type_from_string(raw.type_)! +// // } + +// // if raw.truncate != '' { +// // input.truncate = truncate_type_from_string(raw.truncate)! +// // } + +// return TextEmbeddingInput{} +// } // loads converts a JSON string to TextEmbeddingInput -pub fn loads(text string) !TextEmbeddingInput { - // First decode the JSON string to the raw struct - raw := json.decode(TextEmbeddingInputRaw, text) or { - return error('Failed to decode JSON: $err') - } - - // Then convert the raw struct to the typed struct - return text_embedding_input_from_raw(raw) -} +// pub fn loads(text string) !TextEmbeddingInput { +// // First decode the JSON string to the raw struct +// raw := json.decode(TextEmbeddingInputRaw, text) or { +// return error('Failed to decode JSON: ${err}') +// } -// TextDoc represents a document with ID and text for embedding -pub struct TextDoc { -pub mut: - id string - text string -} +// // Then convert the raw struct to the typed struct +// return text_embedding_input_from_raw(raw) +// } // ModelEmbeddingOutput represents the response from embedding requests pub struct ModelEmbeddingOutput { diff --git a/lib/clients/jina/model_rank.v b/lib/clients/jina/model_rank.v index 3f7d1f1f..0dc0e43a 100644 --- a/lib/clients/jina/model_rank.v +++ b/lib/clients/jina/model_rank.v @@ -1,91 +1,6 @@ module jina -// RankAPIInput represents the input for reranking requests -// model: -// jina-reranker-v2-base-multilingual, 278M -// jina-reranker-v1-base-en, 137M -// jina-reranker-v1-tiny-en, 33M -// jina-reranker-v1-turbo-en, 38M -// jina-colbert-v1-en, 137M -pub struct RankAPIInputRAW { -pub mut: - model string @[required] - query string @[required] - documents []string @[required] - top_n int // Optional: Number of top results to return -} - -// RankingOutput represents the response from reranking requests -pub struct RankingOutput { -pub mut: - model string - results []RankResult - usage Usage - object string -} - -// RankResult represents a single reranking result -pub struct RankResult { -pub mut: - document string - index int - relevance_score f64 -} - -// ClassificationAPIInput represents the input for classification requests -pub struct ClassificationAPIInput { -pub mut: - model string @[required] - input []string @[required] - labels []string @[required] -} - -// ClassificationOutput represents the response from classification requests -pub struct ClassificationOutput { -pub mut: - model string - data []ClassificationData - usage Usage - object string -} - -// ClassificationData represents a single classification result -pub struct ClassificationData { -pub mut: - classifications []Classification - index int -} - -// Classification represents a single label classification with score -pub struct Classification { -pub mut: - label string - score f64 -} - -// TrainingExample represents a single training example for classifier training -pub struct TrainingExample { -pub mut: - text string - label string -} - -// TrainingAPIInput represents the input for training a classifier -pub struct TrainingAPIInput { -pub mut: - model string @[required] - input []TrainingExample @[required] - access string // Optional: "public" or "private" -} - -// TrainingOutput represents the response from training a classifier -pub struct TrainingOutput { -pub mut: - classifier_id string - model string - status string - object string -} +import json // BulkEmbeddingJobResponse represents the response from bulk embedding operations pub struct BulkEmbeddingJobResponse { @@ -136,9 +51,9 @@ pub mut: // ValidationError represents a single validation error pub struct ValidationError { pub mut: - loc []string - msg string - type_ string @[json: 'type'] // 'type' is a keyword, so we need to specify the JSON name + loc []string + msg string + type_ string @[json: 'type'] // 'type' is a keyword, so we need to specify the JSON name } // Serialize and deserialize functions for the main request/response types @@ -158,36 +73,16 @@ pub fn parse_model_embedding_output(json_str string) !ModelEmbeddingOutput { return json.decode(ModelEmbeddingOutput, json_str) } -// Serialize RankAPIInput to JSON -pub fn (input RankAPIInput) to_json() string { - return json.encode(input) -} +// // Serialize RankAPIInput to JSON +// pub fn (input RankAPIInput) to_json() string { +// return json.encode(input) +// } // Parse JSON to RankingOutput pub fn parse_ranking_output(json_str string) !RankingOutput { return json.decode(RankingOutput, json_str) } -// Serialize ClassificationAPIInput to JSON -pub fn (input ClassificationAPIInput) to_json() string { - return json.encode(input) -} - -// Parse JSON to ClassificationOutput -pub fn parse_classification_output(json_str string) !ClassificationOutput { - return json.decode(ClassificationOutput, json_str) -} - -// Serialize TrainingAPIInput to JSON -pub fn (input TrainingAPIInput) to_json() string { - return json.encode(input) -} - -// Parse JSON to TrainingOutput -pub fn parse_training_output(json_str string) !TrainingOutput { - return json.decode(TrainingOutput, json_str) -} - // Parse JSON to BulkEmbeddingJobResponse pub fn parse_bulk_embedding_job_response(json_str string) !BulkEmbeddingJobResponse { return json.decode(BulkEmbeddingJobResponse, json_str) diff --git a/lib/clients/jina/rank_api.v b/lib/clients/jina/rank_api.v new file mode 100644 index 00000000..e17efd50 --- /dev/null +++ b/lib/clients/jina/rank_api.v @@ -0,0 +1,67 @@ +module jina + +import json + +pub enum JinaRerankModel { + reranker_v2_base_multilingual // 278M + reranker_v1_base_en // 137M + reranker_v1_tiny_en // 33M + reranker_v1_turbo_en // 38M + colbert_v1_en // 137M +} + +// RankAPIInput represents the input for reranking requests +pub struct RerankInput { +pub mut: + model string @[required] // Model name + query string @[required] // Query text + documents []string @[required] // Document texts + top_n ?int // Optional: Number of top results to return + return_documents ?bool // Optional: Flag to determine if the documents should be returned +} + +// RankingOutput represents the response from reranking requests +pub struct RankingOutput { +pub mut: + model string + results []RankResult + usage Usage + object string +} + +// RankResult represents a single reranking result +pub struct RankResult { +pub mut: + document RankDocument + index int + relevance_score f64 +} + +// RankDocument represents a single document for reranking +pub struct RankDocument { +pub mut: + text string +} + +// to_string converts a JinaRerankModel enum to its string representation as expected by the Jina API +pub fn (m JinaRerankModel) to_string() string { + return match m { + .reranker_v2_base_multilingual { 'jina-reranker-v2-base-multilingual' } + .reranker_v1_base_en { 'jina-reranker-v1-base-en' } + .reranker_v1_tiny_en { 'jina-reranker-v1-tiny-en' } + .reranker_v1_turbo_en { 'jina-reranker-v1-turbo-en' } + .colbert_v1_en { 'jina-colbert-v1-en' } + } +} + +// from_string converts a string to a JinaRerankModel enum, returning an error if the string is invalid +pub fn jina_rerank_model_from_string(s string) !JinaRerankModel { + return match s { + 'jina-reranker-v2-base-multilingual' { JinaRerankModel.reranker_v2_base_multilingual } + 'jina-reranker-v1-base-en' { JinaRerankModel.reranker_v1_base_en } + 'jina-reranker-v1-tiny-en' { JinaRerankModel.reranker_v1_tiny_en } + 'jina-reranker-v1-turbo-en' { JinaRerankModel.reranker_v1_turbo_en } + 'jina-colbert-v1-en' { JinaRerankModel.colbert_v1_en } + else { error('Invalid JinaRerankModel string: ${s}') } + } +}