Merge branch 'development_action007_mahmoud' into development_actions007

2025-03-13 07:15:40 +01:00
parent e69d67d238 f9ea731a6e
commit 9c09af83a2
6 changed files with 270 additions and 210 deletions
--- a/examples/clients/jina.vsh
+++ b/examples/clients/jina.vsh
@@ -3,6 +3,8 @@
 import freeflowuniverse.herolib.clients.jina

 mut jina_client := jina.get()!
+health := jina_client.health()!
+println('Server health: ${health}')

 // Create embeddings
 embeddings := jina_client.create_embeddings(
@@ -56,5 +58,29 @@ classify_result := jina_client.classify(

 println('Classification result: ${classify_result}')

+// List classifiers
 classifiers := jina_client.list_classifiers() or { panic('Error fetching classifiers: ${err}') }
 println('Classifiers: ${classifiers}')
+
+// Delete classifier
+delete_result := jina_client.delete_classifier(classifier_id: classifiers[0].classifier_id) or {
+	panic('Error deleting classifier: ${err}')
+}
+println('Delete result: ${delete_result}')
+
+// Create multi vector
+multi_vector := jina_client.create_multi_vector(
+	input:          [
+		jina.MultiVectorTextDoc{
+			text:       'Hello world'
+			input_type: .document
+		},
+		jina.MultiVectorTextDoc{
+			text:       "What's up?"
+			input_type: .query
+		},
+	]
+	embedding_type: ['float']
+	// dimensions:     96
+)!
+println('Multi vector: ${multi_vector}')
--- a/lib/clients/jina/jina_client_test.v
+++ b/lib/clients/jina/jina_client_test.v
@@ -85,3 +85,18 @@ fn test_get_classifiers() {
 	classifiers := client.list_classifiers() or { panic('Error fetching classifiers: ${err}') }
 	assert classifiers.len != 0
 }
+
+// Delete classifier
+fn test_delete_classifiers() {
+	time.sleep(1 * time.second)
+	mut client := setup_client()!
+
+	classifiers := client.list_classifiers() or { panic('Error fetching classifiers: ${err}') }
+	assert classifiers.len != 0
+
+	delete_result := client.delete_classifier(classifier_id: classifiers[0].classifier_id) or {
+		panic('Error deleting classifier: ${err}')
+	}
+
+	assert delete_result == '{"message":"Classifier ${classifiers[0].classifier_id} deleted"}'
+}
--- a/lib/clients/jina/classification_api.v
+++ b/lib/clients/jina/classification_api.v
@@ -285,7 +285,25 @@ pub fn (mut j Jina) list_classifiers() ![]Classifier {

 	mut httpclient := j.httpclient()!
 	response := httpclient.get(req)!
-	println('response: ${response}')
 	classifiers := json.decode([]Classifier, response)!
 	return classifiers
 }
+
+// ClassifyParams represents parameters for the classification request
+@[params]
+pub struct DeleteClassifierParams {
+pub mut:
+	classifier_id string @[required] // The ID of the classifier to delete
+}
+
+// Function to delete a classifier by its ID
+pub fn (mut j Jina) delete_classifier(params DeleteClassifierParams) !string {
+	req := httpconnection.Request{
+		method: .delete
+		prefix: 'v1/classifiers/${params.classifier_id}'
+	}
+
+	mut httpclient := j.httpclient()!
+	response := httpclient.delete(req)!
+	return response
+}
--- a/lib/clients/jina/embeddings_api.v
+++ b/lib/clients/jina/embeddings_api.v
@@ -1,5 +1,6 @@
 module jina

+import freeflowuniverse.herolib.core.httpconnection
 import json

 // JinaModel represents the available Jina models
@@ -43,6 +44,32 @@ pub fn jina_model_from_string(s string) !JinaModel {
 	}
 }

+// TruncateType represents the available truncation options
+pub enum TruncateType {
+	none_ // "NONE"
+	start // "START"
+	end   // "END"
+}
+
+// to_string converts TruncateType enum to its string representation
+pub fn (t TruncateType) to_string() string {
+	return match t {
+		.none_ { 'NONE' }
+		.start { 'START' }
+		.end { 'END' }
+	}
+}
+
+// from_string converts string to TruncateType enum
+pub fn truncate_type_from_string(s string) !TruncateType {
+	return match s {
+		'NONE' { TruncateType.none_ }
+		'START' { TruncateType.start }
+		'END' { TruncateType.end }
+		else { error('Invalid truncate type string: ${s}') }
+	}
+}
+
 // EmbeddingType represents the available embedding types
 pub enum EmbeddingType {
 	float   // "float"
@@ -81,17 +108,6 @@ pub enum TaskType {
 	separation        // "separation"
 }

-// to_string converts TaskType enum to its string representation
-pub fn (t TaskType) to_string() string {
-	return match t {
-		.retrieval_query { 'retrieval.query' }
-		.retrieval_passage { 'retrieval.passage' }
-		.text_matching { 'text-matching' }
-		.classification { 'classification' }
-		.separation { 'separation' }
-	}
-}
-
 // from_string converts string to TaskType enum
 pub fn task_type_from_string(s string) !TaskType {
 	return match s {
@@ -104,41 +120,22 @@ pub fn task_type_from_string(s string) !TaskType {
 	}
 }

-// TruncateType represents the available truncation options
-pub enum TruncateType {
-	none_ // "NONE"
-	start // "START"
-	end   // "END"
-}
-
-// to_string converts TruncateType enum to its string representation
-pub fn (t TruncateType) to_string() string {
+// to_string converts TaskType enum to its string representation
+pub fn (t TaskType) to_string() string {
 	return match t {
-		.none_ { 'NONE' }
-		.start { 'START' }
-		.end { 'END' }
+		.retrieval_query { 'retrieval.query' }
+		.retrieval_passage { 'retrieval.passage' }
+		.text_matching { 'text-matching' }
+		.classification { 'classification' }
+		.separation { 'separation' }
 	}
 }

-// from_string converts string to TruncateType enum
-pub fn truncate_type_from_string(s string) !TruncateType {
-	return match s {
-		'NONE' { TruncateType.none_ }
-		'START' { TruncateType.start }
-		'END' { TruncateType.end }
-		else { error('Invalid truncate type string: ${s}') }
-	}
-}
-
-// TextEmbeddingInputRaw represents the raw input for text embedding requests as sent to the server
-struct TextEmbeddingInputRaw {
-mut:
-	model         string = 'jina-embeddings-v2-base-en'
-	input         []string @[required]
-	task          string // Optional: task type as string
-	type_         string @[json: 'type'] // Optional: embedding type as string
-	truncate      string // Optional: "NONE", "START", "END"
-	late_chunking bool   // Optional: Flag to determine if late chunking is applied
+// Usage represents token usage information
+pub struct Usage {
+pub mut:
+	total_tokens int
+	unit         string
 }

 // TextEmbeddingInput represents the input for text embedding requests with enum types
@@ -152,61 +149,14 @@ pub mut:
 	late_chunking ?bool          // Flag to determine if late chunking is applied
 }

-// dumps converts TextEmbeddingInput to JSON string
-pub fn (t TextEmbeddingInput) dumps() !string {
-	mut raw := TextEmbeddingInputRaw{
-		model:         t.model
-		input:         t.input
-		late_chunking: if v := t.late_chunking { true } else { false }
-	}
-
-	raw.task = t.task.to_string()
-	if v := t.type_ {
-		raw.type_ = v.to_string()
-	}
-
-	if v := t.truncate {
-		raw.truncate = v.to_string()
-	}
-
-	return json.encode(raw)
+// EmbeddingData represents a single embedding result
+pub struct EmbeddingData {
+pub mut:
+	embedding []f64
+	index     int
+	object    string
 }

-// from_raw converts TextEmbeddingInputRaw to TextEmbeddingInput
-// pub fn loads_text_embedding_input(text string) !TextEmbeddingInput {
-// 	// TODO: go from text to InputObject over json
-// 	// mut input := TextEmbeddingInput{
-// 	// 	model:         jina_model_from_string(raw.model)?
-// 	// 	input:         raw.input
-// 	// 	late_chunking: raw.late_chunking
-// 	// }
-
-// 	// if raw.task != '' {
-// 	// 	input.task = task_type_from_string(raw.task)!
-// 	// }
-
-// 	// if raw.type_ != '' {
-// 	// 	input.type_ = embedding_type_from_string(raw.type_)!
-// 	// }
-
-// 	// if raw.truncate != '' {
-// 	// 	input.truncate = truncate_type_from_string(raw.truncate)!
-// 	// }
-
-// 	return TextEmbeddingInput{}
-// }
-
-// loads converts a JSON string to TextEmbeddingInput
-// pub fn loads(text string) !TextEmbeddingInput {
-// 	// First decode the JSON string to the raw struct
-// 	raw := json.decode(TextEmbeddingInputRaw, text) or {
-// 		return error('Failed to decode JSON: ${err}')
-// 	}
-
-// 	// Then convert the raw struct to the typed struct
-// 	return text_embedding_input_from_raw(raw)
-// }
-
 // ModelEmbeddingOutput represents the response from embedding requests
 pub struct ModelEmbeddingOutput {
 pub mut:
@@ -217,17 +167,74 @@ pub mut:
 	dimension int
 }

-// EmbeddingData represents a single embedding result
-pub struct EmbeddingData {
+// CreateEmbeddingParams represents the parameters for creating embeddings
+@[params]
+pub struct CreateEmbeddingParams {
 pub mut:
-	embedding []f64
-	index     int
-	object    string
+	input         []string  @[required] // Input texts
+	model         JinaModel @[required] // Model name
+	task          string    @[required] // Task type
+	type_         ?EmbeddingType // embedding type
+	truncate      ?TruncateType  // truncation type
+	late_chunking ?bool          // Flag to determine if late chunking is applied
 }

-// Usage represents token usage information
-pub struct Usage {
-pub mut:
-	total_tokens int
-	unit         string
+// Create embeddings for input texts
+pub fn (mut j Jina) create_embeddings(params CreateEmbeddingParams) !ModelEmbeddingOutput {
+	task := task_type_from_string(params.task)!
+
+	mut embedding_input := TextEmbeddingInput{
+		input: params.input
+		model: params.model.to_string()
+		task:  task
+	}
+
+	if v := params.type_ {
+		embedding_input.type_ = v
+	}
+
+	if v := params.truncate {
+		embedding_input.truncate = v
+	}
+
+	embedding_input.late_chunking = if _ := params.late_chunking { true } else { false }
+
+	req := httpconnection.Request{
+		method:     .post
+		prefix:     'v1/embeddings'
+		dataformat: .json
+		data:       json.encode(embedding_input)
+	}
+
+	mut httpclient := j.httpclient()!
+	response := httpclient.post_json_str(req)!
+	return json.decode(ModelEmbeddingOutput, response)!
+}
+
+pub struct HealthResponse {
+pub mut:
+	status  string
+	message string
+	healthy bool
+}
+
+pub fn (mut j Jina) health() !HealthResponse {
+	req := httpconnection.Request{
+		method: .get
+	}
+
+	mut httpclient := j.httpclient()!
+	response := httpclient.send(req)!
+	if response.code == 200 {
+		return HealthResponse{
+			status:  response.code.str()
+			message: '200 Service available'
+			healthy: true
+		}
+	}
+	return HealthResponse{
+		status:  response.code.str()
+		message: '${response.code} Service Unavailable'
+		healthy: false
+	}
 }
--- a/lib/clients/jina/model_rank.v
+++ b/lib/clients/jina/model_rank.v
@@ -1,104 +0,0 @@
-module jina
-
-import json
-
-// BulkEmbeddingJobResponse represents the response from bulk embedding operations
-pub struct BulkEmbeddingJobResponse {
-pub mut:
-	job_id        string
-	status        string
-	model         string
-	created_at    string
-	completed_at  string
-	error_message string
-}
-
-// DownloadResultResponse represents the response for downloading bulk embedding results
-pub struct DownloadResultResponse {
-pub mut:
-	download_url string
-	expires_at   string
-}
-
-// MultiVectorUsage represents token usage information for multi-vector embeddings
-pub struct MultiVectorUsage {
-pub mut:
-	total_tokens int
-}
-
-// MultiVectorEmbeddingData represents a single multi-vector embedding result
-pub struct MultiVectorEmbeddingData {
-pub mut:
-	embeddings [][]f64
-	index      int
-}
-
-// ColbertModelEmbeddingsOutput represents the response from multi-vector embedding requests
-pub struct ColbertModelEmbeddingsOutput {
-pub mut:
-	model  string
-	object string
-	data   []MultiVectorEmbeddingData
-	usage  MultiVectorUsage
-}
-
-// HTTPValidationError represents a validation error response
-pub struct HTTPValidationError {
-pub mut:
-	detail []ValidationError
-}
-
-// ValidationError represents a single validation error
-pub struct ValidationError {
-pub mut:
-	loc   []string
-	msg   string
-	type_ string @[json: 'type'] // 'type' is a keyword, so we need to specify the JSON name
-}
-
-// Serialize and deserialize functions for the main request/response types
-
-// Serialize TextEmbeddingInput to JSON
-pub fn (input TextEmbeddingInput) to_json() string {
-	return json.encode(input)
-}
-
-// Parse JSON to TextEmbeddingInput
-pub fn parse_text_embedding_input(json_str string) !TextEmbeddingInput {
-	return json.decode(TextEmbeddingInput, json_str)
-}
-
-// Parse JSON to ModelEmbeddingOutput
-pub fn parse_model_embedding_output(json_str string) !ModelEmbeddingOutput {
-	return json.decode(ModelEmbeddingOutput, json_str)
-}
-
-// // Serialize RankAPIInput to JSON
-// pub fn (input RankAPIInput) to_json() string {
-// 	return json.encode(input)
-// }
-
-// Parse JSON to RankingOutput
-pub fn parse_ranking_output(json_str string) !RankingOutput {
-	return json.decode(RankingOutput, json_str)
-}
-
-// Parse JSON to BulkEmbeddingJobResponse
-pub fn parse_bulk_embedding_job_response(json_str string) !BulkEmbeddingJobResponse {
-	return json.decode(BulkEmbeddingJobResponse, json_str)
-}
-
-// Parse JSON to DownloadResultResponse
-pub fn parse_download_result_response(json_str string) !DownloadResultResponse {
-	return json.decode(DownloadResultResponse, json_str)
-}
-
-// Parse JSON to ColbertModelEmbeddingsOutput
-pub fn parse_colbert_model_embeddings_output(json_str string) !ColbertModelEmbeddingsOutput {
-	return json.decode(ColbertModelEmbeddingsOutput, json_str)
-}
-
-// Parse JSON to HTTPValidationError
-pub fn parse_http_validation_error(json_str string) !HTTPValidationError {
-	return json.decode(HTTPValidationError, json_str)
-}
--- a/lib/clients/jina/multi_vector_api.v
+++ b/lib/clients/jina/multi_vector_api.v
@@ -0,0 +1,98 @@
+module jina
+
+import json
+import freeflowuniverse.herolib.core.httpconnection
+
+// Enum for available Jina multi-vector models
+pub enum MultiVectorModel {
+	jina_colbert_v1_en // jina-colbert-v1-en
+}
+
+// Convert the enum to a valid string
+pub fn (m MultiVectorModel) to_string() string {
+	return match m {
+		.jina_colbert_v1_en { 'jina-colbert-v1-en' }
+	}
+}
+
+// Enum for input types
+pub enum MultiVectorInputType {
+	document // document
+	query    // query
+}
+
+// MultiVectorTextDoc represents a text document for a multi-vector request
+pub struct MultiVectorTextDoc {
+pub mut:
+	id         ?string // Optional: ID of the document
+	text       string @[required] // Text of the document
+	input_type ?MultiVectorInputType // Optional: Type of the embedding to compute, query or document
+}
+
+// MultiVectorRequest represents the JSON request body for the /v1/multi-vector endpoint
+struct MultiVectorRequest {
+	model          string               // Model name
+	input          []MultiVectorTextDoc // Input documents
+	embedding_type ?[]string            // Optional: Embedding type
+	dimensions     ?int                 // Optional: Number of dimensions
+}
+
+// MultiVectorResponse represents the JSON response body for the /v1/multi-vector endpoint
+pub struct MultiVectorResponse {
+	data   []Embedding // List of embeddings
+	usage  Usage       // Usage information
+	model  string      // Model name
+	object string      // Object type as string
+}
+
+// EmbeddingObjType represents the embeddings object in the response
+pub struct EmbeddingObjType {
+pub mut:
+	float  ?[][]f64  // Optional 2D array of floats for multi-vector embeddings
+	base64 ?[]string // Optional array of base64 strings
+	binary ?[]u8     // Optional array of bytes
+}
+
+// SEmbeddingType is a sum type to handle different embedding formats
+pub type SEmbeddingType = EmbeddingObjType | []f64 | []string | []u8
+
+// Embedding represents an embedding vector
+pub struct Embedding {
+	index      int            // Index of the document
+	embeddings SEmbeddingType // Embedding vector as a sum type
+	object     string         // Object type as string
+}
+
+// MultiVectorParams represents the parameters for a multi-vector request
+@[params]
+pub struct MultiVectorParams {
+pub mut:
+	model          MultiVectorModel = .jina_colbert_v1_en // Model name
+	input          []MultiVectorTextDoc  // Input documents
+	input_type     ?MultiVectorInputType // Optional: Type of the embedding to compute, query or document
+	embedding_type ?[]string             // Optional: Embedding type
+	dimensions     ?int                  // Optional: Number of dimensions
+}
+
+// CreateMultiVector creates a multi-vector request and returns the response
+pub fn (mut j Jina) create_multi_vector(params MultiVectorParams) !MultiVectorResponse {
+	request := MultiVectorRequest{
+		model:          params.model.to_string()
+		input:          params.input
+		embedding_type: params.embedding_type
+		dimensions:     params.dimensions
+	}
+
+	req := httpconnection.Request{
+		method:     .post
+		prefix:     'v1/multi-vector'
+		dataformat: .json
+		data:       json.encode(request)
+	}
+
+	mut httpclient := j.httpclient()!
+	response := httpclient.post_json_str(req)!
+	println('response: ${response}')
+	result := json.decode(MultiVectorResponse, response)!
+	return result
+}