diff --git a/examples/clients/jina.vsh b/examples/clients/jina.vsh index 7da391d8..ea430db1 100755 --- a/examples/clients/jina.vsh +++ b/examples/clients/jina.vsh @@ -3,6 +3,8 @@ import freeflowuniverse.herolib.clients.jina mut jina_client := jina.get()! +health := jina_client.health()! +println('Server health: ${health}') // Create embeddings embeddings := jina_client.create_embeddings( @@ -56,5 +58,29 @@ classify_result := jina_client.classify( println('Classification result: ${classify_result}') +// List classifiers classifiers := jina_client.list_classifiers() or { panic('Error fetching classifiers: ${err}') } println('Classifiers: ${classifiers}') + +// Delete classifier +delete_result := jina_client.delete_classifier(classifier_id: classifiers[0].classifier_id) or { + panic('Error deleting classifier: ${err}') +} +println('Delete result: ${delete_result}') + +// Create multi vector +multi_vector := jina_client.create_multi_vector( + input: [ + jina.MultiVectorTextDoc{ + text: 'Hello world' + input_type: .document + }, + jina.MultiVectorTextDoc{ + text: "What's up?" + input_type: .query + }, + ] + embedding_type: ['float'] + // dimensions: 96 +)! +println('Multi vector: ${multi_vector}') diff --git a/lib/clients/jina/jina_client_test.v b/lib/clients/jina/api_test.v similarity index 82% rename from lib/clients/jina/jina_client_test.v rename to lib/clients/jina/api_test.v index 1b453f02..75dfa380 100644 --- a/lib/clients/jina/jina_client_test.v +++ b/lib/clients/jina/api_test.v @@ -85,3 +85,18 @@ fn test_get_classifiers() { classifiers := client.list_classifiers() or { panic('Error fetching classifiers: ${err}') } assert classifiers.len != 0 } + +// Delete classifier +fn test_delete_classifiers() { + time.sleep(1 * time.second) + mut client := setup_client()! + + classifiers := client.list_classifiers() or { panic('Error fetching classifiers: ${err}') } + assert classifiers.len != 0 + + delete_result := client.delete_classifier(classifier_id: classifiers[0].classifier_id) or { + panic('Error deleting classifier: ${err}') + } + + assert delete_result == '{"message":"Classifier ${classifiers[0].classifier_id} deleted"}' +} diff --git a/lib/clients/jina/classification_api.v b/lib/clients/jina/classification_api.v index 820697d6..b06ec13b 100644 --- a/lib/clients/jina/classification_api.v +++ b/lib/clients/jina/classification_api.v @@ -285,7 +285,25 @@ pub fn (mut j Jina) list_classifiers() ![]Classifier { mut httpclient := j.httpclient()! response := httpclient.get(req)! - println('response: ${response}') classifiers := json.decode([]Classifier, response)! return classifiers } + +// ClassifyParams represents parameters for the classification request +@[params] +pub struct DeleteClassifierParams { +pub mut: + classifier_id string @[required] // The ID of the classifier to delete +} + +// Function to delete a classifier by its ID +pub fn (mut j Jina) delete_classifier(params DeleteClassifierParams) !string { + req := httpconnection.Request{ + method: .delete + prefix: 'v1/classifiers/${params.classifier_id}' + } + + mut httpclient := j.httpclient()! + response := httpclient.delete(req)! + return response +} diff --git a/lib/clients/jina/model_embed.v b/lib/clients/jina/embeddings_api.v similarity index 71% rename from lib/clients/jina/model_embed.v rename to lib/clients/jina/embeddings_api.v index f7b564e4..8cf61dbe 100644 --- a/lib/clients/jina/model_embed.v +++ b/lib/clients/jina/embeddings_api.v @@ -1,5 +1,6 @@ module jina +import freeflowuniverse.herolib.core.httpconnection import json // JinaModel represents the available Jina models @@ -43,6 +44,32 @@ pub fn jina_model_from_string(s string) !JinaModel { } } +// TruncateType represents the available truncation options +pub enum TruncateType { + none_ // "NONE" + start // "START" + end // "END" +} + +// to_string converts TruncateType enum to its string representation +pub fn (t TruncateType) to_string() string { + return match t { + .none_ { 'NONE' } + .start { 'START' } + .end { 'END' } + } +} + +// from_string converts string to TruncateType enum +pub fn truncate_type_from_string(s string) !TruncateType { + return match s { + 'NONE' { TruncateType.none_ } + 'START' { TruncateType.start } + 'END' { TruncateType.end } + else { error('Invalid truncate type string: ${s}') } + } +} + // EmbeddingType represents the available embedding types pub enum EmbeddingType { float // "float" @@ -81,17 +108,6 @@ pub enum TaskType { separation // "separation" } -// to_string converts TaskType enum to its string representation -pub fn (t TaskType) to_string() string { - return match t { - .retrieval_query { 'retrieval.query' } - .retrieval_passage { 'retrieval.passage' } - .text_matching { 'text-matching' } - .classification { 'classification' } - .separation { 'separation' } - } -} - // from_string converts string to TaskType enum pub fn task_type_from_string(s string) !TaskType { return match s { @@ -104,41 +120,22 @@ pub fn task_type_from_string(s string) !TaskType { } } -// TruncateType represents the available truncation options -pub enum TruncateType { - none_ // "NONE" - start // "START" - end // "END" -} - -// to_string converts TruncateType enum to its string representation -pub fn (t TruncateType) to_string() string { +// to_string converts TaskType enum to its string representation +pub fn (t TaskType) to_string() string { return match t { - .none_ { 'NONE' } - .start { 'START' } - .end { 'END' } + .retrieval_query { 'retrieval.query' } + .retrieval_passage { 'retrieval.passage' } + .text_matching { 'text-matching' } + .classification { 'classification' } + .separation { 'separation' } } } -// from_string converts string to TruncateType enum -pub fn truncate_type_from_string(s string) !TruncateType { - return match s { - 'NONE' { TruncateType.none_ } - 'START' { TruncateType.start } - 'END' { TruncateType.end } - else { error('Invalid truncate type string: ${s}') } - } -} - -// TextEmbeddingInputRaw represents the raw input for text embedding requests as sent to the server -struct TextEmbeddingInputRaw { -mut: - model string = 'jina-embeddings-v2-base-en' - input []string @[required] - task string // Optional: task type as string - type_ string @[json: 'type'] // Optional: embedding type as string - truncate string // Optional: "NONE", "START", "END" - late_chunking bool // Optional: Flag to determine if late chunking is applied +// Usage represents token usage information +pub struct Usage { +pub mut: + total_tokens int + unit string } // TextEmbeddingInput represents the input for text embedding requests with enum types @@ -152,61 +149,14 @@ pub mut: late_chunking ?bool // Flag to determine if late chunking is applied } -// dumps converts TextEmbeddingInput to JSON string -pub fn (t TextEmbeddingInput) dumps() !string { - mut raw := TextEmbeddingInputRaw{ - model: t.model - input: t.input - late_chunking: if v := t.late_chunking { true } else { false } - } - - raw.task = t.task.to_string() - if v := t.type_ { - raw.type_ = v.to_string() - } - - if v := t.truncate { - raw.truncate = v.to_string() - } - - return json.encode(raw) +// EmbeddingData represents a single embedding result +pub struct EmbeddingData { +pub mut: + embedding []f64 + index int + object string } -// from_raw converts TextEmbeddingInputRaw to TextEmbeddingInput -// pub fn loads_text_embedding_input(text string) !TextEmbeddingInput { -// // TODO: go from text to InputObject over json -// // mut input := TextEmbeddingInput{ -// // model: jina_model_from_string(raw.model)? -// // input: raw.input -// // late_chunking: raw.late_chunking -// // } - -// // if raw.task != '' { -// // input.task = task_type_from_string(raw.task)! -// // } - -// // if raw.type_ != '' { -// // input.type_ = embedding_type_from_string(raw.type_)! -// // } - -// // if raw.truncate != '' { -// // input.truncate = truncate_type_from_string(raw.truncate)! -// // } - -// return TextEmbeddingInput{} -// } - -// loads converts a JSON string to TextEmbeddingInput -// pub fn loads(text string) !TextEmbeddingInput { -// // First decode the JSON string to the raw struct -// raw := json.decode(TextEmbeddingInputRaw, text) or { -// return error('Failed to decode JSON: ${err}') -// } - -// // Then convert the raw struct to the typed struct -// return text_embedding_input_from_raw(raw) -// } - // ModelEmbeddingOutput represents the response from embedding requests pub struct ModelEmbeddingOutput { pub mut: @@ -217,17 +167,74 @@ pub mut: dimension int } -// EmbeddingData represents a single embedding result -pub struct EmbeddingData { +// CreateEmbeddingParams represents the parameters for creating embeddings +@[params] +pub struct CreateEmbeddingParams { pub mut: - embedding []f64 - index int - object string + input []string @[required] // Input texts + model JinaModel @[required] // Model name + task string @[required] // Task type + type_ ?EmbeddingType // embedding type + truncate ?TruncateType // truncation type + late_chunking ?bool // Flag to determine if late chunking is applied } -// Usage represents token usage information -pub struct Usage { -pub mut: - total_tokens int - unit string +// Create embeddings for input texts +pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbeddingOutput { + task := task_type_from_string(params.task)! + + mut embedding_input := TextEmbeddingInput{ + input: params.input + model: params.model.to_string() + task: task + } + + if v := params.type_ { + embedding_input.type_ = v + } + + if v := params.truncate { + embedding_input.truncate = v + } + + embedding_input.late_chunking = if _ := params.late_chunking { true } else { false } + + req := httpconnection.Request{ + method: .post + prefix: 'v1/embeddings' + dataformat: .json + data: json.encode(embedding_input) + } + + mut httpclient := j.httpclient()! + response := httpclient.post_json_str(req)! + return json.decode(ModelEmbeddingOutput, response)! +} + +pub struct HealthResponse { +pub mut: + status string + message string + healthy bool +} + +pub fn (mut j Jina) health() !HealthResponse { + req := httpconnection.Request{ + method: .get + } + + mut httpclient := j.httpclient()! + response := httpclient.send(req)! + if response.code == 200 { + return HealthResponse{ + status: response.code.str() + message: '200 Service available' + healthy: true + } + } + return HealthResponse{ + status: response.code.str() + message: '${response.code} Service Unavailable' + healthy: false + } } diff --git a/lib/clients/jina/model_rank.v b/lib/clients/jina/model_rank.v deleted file mode 100644 index 0dc0e43a..00000000 --- a/lib/clients/jina/model_rank.v +++ /dev/null @@ -1,104 +0,0 @@ -module jina - -import json - -// BulkEmbeddingJobResponse represents the response from bulk embedding operations -pub struct BulkEmbeddingJobResponse { -pub mut: - job_id string - status string - model string - created_at string - completed_at string - error_message string -} - -// DownloadResultResponse represents the response for downloading bulk embedding results -pub struct DownloadResultResponse { -pub mut: - download_url string - expires_at string -} - -// MultiVectorUsage represents token usage information for multi-vector embeddings -pub struct MultiVectorUsage { -pub mut: - total_tokens int -} - -// MultiVectorEmbeddingData represents a single multi-vector embedding result -pub struct MultiVectorEmbeddingData { -pub mut: - embeddings [][]f64 - index int -} - -// ColbertModelEmbeddingsOutput represents the response from multi-vector embedding requests -pub struct ColbertModelEmbeddingsOutput { -pub mut: - model string - object string - data []MultiVectorEmbeddingData - usage MultiVectorUsage -} - -// HTTPValidationError represents a validation error response -pub struct HTTPValidationError { -pub mut: - detail []ValidationError -} - -// ValidationError represents a single validation error -pub struct ValidationError { -pub mut: - loc []string - msg string - type_ string @[json: 'type'] // 'type' is a keyword, so we need to specify the JSON name -} - -// Serialize and deserialize functions for the main request/response types - -// Serialize TextEmbeddingInput to JSON -pub fn (input TextEmbeddingInput) to_json() string { - return json.encode(input) -} - -// Parse JSON to TextEmbeddingInput -pub fn parse_text_embedding_input(json_str string) !TextEmbeddingInput { - return json.decode(TextEmbeddingInput, json_str) -} - -// Parse JSON to ModelEmbeddingOutput -pub fn parse_model_embedding_output(json_str string) !ModelEmbeddingOutput { - return json.decode(ModelEmbeddingOutput, json_str) -} - -// // Serialize RankAPIInput to JSON -// pub fn (input RankAPIInput) to_json() string { -// return json.encode(input) -// } - -// Parse JSON to RankingOutput -pub fn parse_ranking_output(json_str string) !RankingOutput { - return json.decode(RankingOutput, json_str) -} - -// Parse JSON to BulkEmbeddingJobResponse -pub fn parse_bulk_embedding_job_response(json_str string) !BulkEmbeddingJobResponse { - return json.decode(BulkEmbeddingJobResponse, json_str) -} - -// Parse JSON to DownloadResultResponse -pub fn parse_download_result_response(json_str string) !DownloadResultResponse { - return json.decode(DownloadResultResponse, json_str) -} - -// Parse JSON to ColbertModelEmbeddingsOutput -pub fn parse_colbert_model_embeddings_output(json_str string) !ColbertModelEmbeddingsOutput { - return json.decode(ColbertModelEmbeddingsOutput, json_str) -} - -// Parse JSON to HTTPValidationError -pub fn parse_http_validation_error(json_str string) !HTTPValidationError { - return json.decode(HTTPValidationError, json_str) -} diff --git a/lib/clients/jina/multi_vector_api.v b/lib/clients/jina/multi_vector_api.v new file mode 100644 index 00000000..8061b757 --- /dev/null +++ b/lib/clients/jina/multi_vector_api.v @@ -0,0 +1,98 @@ +module jina + +import json +import freeflowuniverse.herolib.core.httpconnection + +// Enum for available Jina multi-vector models +pub enum MultiVectorModel { + jina_colbert_v1_en // jina-colbert-v1-en +} + +// Convert the enum to a valid string +pub fn (m MultiVectorModel) to_string() string { + return match m { + .jina_colbert_v1_en { 'jina-colbert-v1-en' } + } +} + +// Enum for input types +pub enum MultiVectorInputType { + document // document + query // query +} + +// MultiVectorTextDoc represents a text document for a multi-vector request +pub struct MultiVectorTextDoc { +pub mut: + id ?string // Optional: ID of the document + text string @[required] // Text of the document + input_type ?MultiVectorInputType // Optional: Type of the embedding to compute, query or document +} + +// MultiVectorRequest represents the JSON request body for the /v1/multi-vector endpoint +struct MultiVectorRequest { + model string // Model name + input []MultiVectorTextDoc // Input documents + embedding_type ?[]string // Optional: Embedding type + dimensions ?int // Optional: Number of dimensions +} + +// MultiVectorResponse represents the JSON response body for the /v1/multi-vector endpoint +pub struct MultiVectorResponse { + data []Embedding // List of embeddings + usage Usage // Usage information + model string // Model name + object string // Object type as string +} + +// EmbeddingObjType represents the embeddings object in the response +pub struct EmbeddingObjType { +pub mut: + float ?[][]f64 // Optional 2D array of floats for multi-vector embeddings + base64 ?[]string // Optional array of base64 strings + binary ?[]u8 // Optional array of bytes +} + +// SEmbeddingType is a sum type to handle different embedding formats +pub type SEmbeddingType = EmbeddingObjType | []f64 | []string | []u8 + +// Embedding represents an embedding vector +pub struct Embedding { + index int // Index of the document + embeddings SEmbeddingType // Embedding vector as a sum type + object string // Object type as string +} + +// MultiVectorParams represents the parameters for a multi-vector request +@[params] +pub struct MultiVectorParams { +pub mut: + model MultiVectorModel = .jina_colbert_v1_en // Model name + input []MultiVectorTextDoc // Input documents + input_type ?MultiVectorInputType // Optional: Type of the embedding to compute, query or document + embedding_type ?[]string // Optional: Embedding type + dimensions ?int // Optional: Number of dimensions +} + +// CreateMultiVector creates a multi-vector request and returns the response +pub fn (mut j Jina) create_multi_vector(params MultiVectorParams) !MultiVectorResponse { + request := MultiVectorRequest{ + model: params.model.to_string() + input: params.input + embedding_type: params.embedding_type + dimensions: params.dimensions + } + + req := httpconnection.Request{ + method: .post + prefix: 'v1/multi-vector' + dataformat: .json + data: json.encode(request) + } + + mut httpclient := j.httpclient()! + response := httpclient.post_json_str(req)! + println('response: ${response}') + result := json.decode(MultiVectorResponse, response)! + return result +}