Add coordinator client library, installation scripts, and new test runners

- Add coordinator client library to workspace
- Add installation documentation and heroscript
- Add new test runners for Osiris and Sal
- Update hero runner test to handle invalid heroscript errors
- Update README with installation instructions
This commit is contained in:
Timur Gordon
2025-11-17 10:56:13 +01:00
parent c95be9339e
commit f66edba1d3
12 changed files with 1650 additions and 17 deletions

View File

@@ -0,0 +1,42 @@
//! Error types for the Coordinator client
use thiserror::Error;
/// Result type for coordinator operations
pub type Result<T> = std::result::Result<T, CoordinatorError>;
/// Errors that can occur when interacting with the Coordinator
#[derive(Debug, Error)]
pub enum CoordinatorError {
/// Connection error
#[error("Connection error: {0}")]
Connection(String),
/// RPC error
#[error("RPC error: {0}")]
Rpc(String),
/// Resource already exists
#[error("Resource already exists")]
AlreadyExists,
/// Resource not found
#[error("Not found: {0}")]
NotFound(String),
/// Storage error
#[error("Storage error: {0}")]
Storage(String),
/// DAG error
#[error("DAG error: {0}")]
Dag(String),
/// Timeout error
#[error("Timeout: {0}")]
Timeout(String),
/// Serialization error
#[error("Serialization error: {0}")]
Serialization(#[from] serde_json::Error),
}

View File

@@ -0,0 +1,327 @@
//! Hero Coordinator Client Library
//!
//! This library provides a Rust client for interacting with the Hero Coordinator JSON-RPC API.
//! It supports creating and managing actors, contexts, runners, jobs, and flows.
//!
//! # Example
//!
//! ```no_run
//! use hero_coordinator_client::{CoordinatorClient, models::*};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let client = CoordinatorClient::new("http://127.0.0.1:9652")?;
//!
//! // Create an actor
//! let actor = client.actor_create(ActorCreate {
//! id: 11001,
//! pubkey: "demo-pubkey".to_string(),
//! address: vec!["127.0.0.1".parse()?],
//! }).await?;
//!
//! println!("Created actor: {:?}", actor);
//! Ok(())
//! }
//! ```
pub mod models;
pub mod error;
use jsonrpsee::http_client::{HttpClient, HttpClientBuilder};
use serde_json::Value;
pub use error::{CoordinatorError, Result};
pub use models::*;
/// Client for interacting with the Hero Coordinator
#[derive(Clone)]
pub struct CoordinatorClient {
client: HttpClient,
url: String,
}
impl CoordinatorClient {
/// Create a new coordinator client
///
/// # Arguments
/// * `url` - The URL of the coordinator (e.g., "http://127.0.0.1:9652")
pub fn new(url: impl Into<String>) -> Result<Self> {
let url = url.into();
let client = HttpClientBuilder::default()
.build(&url)
.map_err(|e| CoordinatorError::Connection(e.to_string()))?;
Ok(Self { client, url })
}
/// Get the coordinator URL
pub fn url(&self) -> &str {
&self.url
}
// ==================== Actor Methods ====================
/// Create a new actor
pub async fn actor_create(&self, actor: ActorCreate) -> Result<Actor> {
let params = serde_json::json!({ "actor": actor });
self.call("actor.create", params).await
}
/// Load an existing actor
pub async fn actor_load(&self, id: u32) -> Result<Actor> {
let params = serde_json::json!({ "id": id });
self.call("actor.load", params).await
}
/// Try to create an actor, or load it if it already exists
pub async fn actor_create_or_load(&self, actor: ActorCreate) -> Result<Actor> {
match self.actor_create(actor.clone()).await {
Ok(a) => Ok(a),
Err(CoordinatorError::AlreadyExists | CoordinatorError::Storage(_)) => {
self.actor_load(actor.id).await
}
Err(e) => Err(e),
}
}
// ==================== Context Methods ====================
/// Create a new context
pub async fn context_create(&self, context: ContextCreate) -> Result<Context> {
let params = serde_json::json!({ "context": context });
self.call("context.create", params).await
}
/// Load an existing context
pub async fn context_load(&self, id: u32) -> Result<Context> {
let params = serde_json::json!({ "id": id });
self.call("context.load", params).await
}
/// Try to create a context, or load it if it already exists
pub async fn context_create_or_load(&self, context: ContextCreate) -> Result<Context> {
match self.context_create(context.clone()).await {
Ok(c) => Ok(c),
Err(CoordinatorError::AlreadyExists | CoordinatorError::Storage(_)) => {
self.context_load(context.id).await
}
Err(e) => Err(e),
}
}
// ==================== Runner Methods ====================
/// Create a new runner in a context
pub async fn runner_create(&self, context_id: u32, runner: RunnerCreate) -> Result<Runner> {
let params = serde_json::json!({
"context_id": context_id,
"runner": runner
});
self.call("runner.create", params).await
}
/// Load an existing runner from a context
pub async fn runner_load(&self, context_id: u32, id: u32) -> Result<Runner> {
let params = serde_json::json!({
"context_id": context_id,
"id": id
});
self.call("runner.load", params).await
}
/// Try to create a runner, or load it if it already exists
pub async fn runner_create_or_load(&self, context_id: u32, runner: RunnerCreate) -> Result<Runner> {
match self.runner_create(context_id, runner.clone()).await {
Ok(r) => Ok(r),
Err(CoordinatorError::AlreadyExists | CoordinatorError::Storage(_)) => {
self.runner_load(context_id, runner.id).await
}
Err(e) => Err(e),
}
}
// ==================== Job Methods ====================
/// Create a new job in a context
pub async fn job_create(&self, context_id: u32, job: JobCreate) -> Result<Job> {
let params = serde_json::json!({
"context_id": context_id,
"job": job
});
self.call("job.create", params).await
}
/// Load an existing job from a context
pub async fn job_load(&self, context_id: u32, caller_id: u32, id: u32) -> Result<Job> {
let params = serde_json::json!({
"context_id": context_id,
"caller_id": caller_id,
"id": id
});
self.call("job.load", params).await
}
/// Try to create a job, or load it if it already exists
pub async fn job_create_or_load(&self, context_id: u32, job: JobCreate) -> Result<Job> {
let caller_id = job.caller_id;
let job_id = job.id;
match self.job_create(context_id, job).await {
Ok(j) => Ok(j),
Err(CoordinatorError::AlreadyExists | CoordinatorError::Storage(_)) => {
self.job_load(context_id, caller_id, job_id).await
}
Err(e) => Err(e),
}
}
// ==================== Flow Methods ====================
/// Create a new flow in a context
pub async fn flow_create(&self, context_id: u32, flow: FlowCreate) -> Result<Flow> {
let params = serde_json::json!({
"context_id": context_id,
"flow": flow
});
self.call("flow.create", params).await
}
/// Load an existing flow from a context
pub async fn flow_load(&self, context_id: u32, id: u32) -> Result<Flow> {
let params = serde_json::json!({
"context_id": context_id,
"id": id
});
self.call("flow.load", params).await
}
/// Try to create a flow, or load it if it already exists
pub async fn flow_create_or_load(&self, context_id: u32, flow: FlowCreate) -> Result<Flow> {
let flow_id = flow.id;
match self.flow_create(context_id, flow).await {
Ok(f) => Ok(f),
Err(CoordinatorError::AlreadyExists | CoordinatorError::Storage(_)) => {
self.flow_load(context_id, flow_id).await
}
Err(e) => Err(e),
}
}
/// Get the DAG representation of a flow
pub async fn flow_dag(&self, context_id: u32, id: u32) -> Result<FlowDag> {
let params = serde_json::json!({
"context_id": context_id,
"id": id
});
self.call("flow.dag", params).await
}
/// Start a flow
///
/// Returns true if the scheduler was started, false if it was already running
pub async fn flow_start(&self, context_id: u32, id: u32) -> Result<bool> {
let params = serde_json::json!({
"context_id": context_id,
"id": id
});
self.call("flow.start", params).await
}
// ==================== Message Methods ====================
/// Create a new message in a context
pub async fn message_create(&self, context_id: u32, message: MessageCreate) -> Result<Message> {
let params = serde_json::json!({
"context_id": context_id,
"message": message
});
self.call("message.create", params).await
}
/// Load an existing message from a context
pub async fn message_load(&self, context_id: u32, id: u32) -> Result<Message> {
let params = serde_json::json!({
"context_id": context_id,
"id": id
});
self.call("message.load", params).await
}
// ==================== Helper Methods ====================
/// Poll a flow until it reaches a terminal state (Finished or Error)
///
/// # Arguments
/// * `context_id` - The context ID
/// * `flow_id` - The flow ID
/// * `poll_interval` - Duration between polls
/// * `timeout` - Maximum duration to wait
///
/// Returns the final flow state or an error if timeout is reached
pub async fn flow_poll_until_complete(
&self,
context_id: u32,
flow_id: u32,
poll_interval: std::time::Duration,
timeout: std::time::Duration,
) -> Result<Flow> {
let start = std::time::Instant::now();
loop {
let flow = self.flow_load(context_id, flow_id).await?;
match flow.status {
FlowStatus::Finished | FlowStatus::Error => {
return Ok(flow);
}
_ => {
if start.elapsed() > timeout {
return Err(CoordinatorError::Timeout(format!(
"Flow {} did not complete within {:?}",
flow_id, timeout
)));
}
tokio::time::sleep(poll_interval).await;
}
}
}
}
// ==================== Internal Methods ====================
async fn call<T: serde::de::DeserializeOwned>(&self, method: &str, params: Value) -> Result<T> {
use jsonrpsee::core::client::ClientT;
use jsonrpsee::core::params::ArrayParams;
let mut array_params = ArrayParams::new();
array_params.insert(params).map_err(|e| CoordinatorError::Rpc(e.to_string()))?;
self.client
.request(method, array_params)
.await
.map_err(|e| {
let err_str = e.to_string();
if err_str.contains("Already exists") {
CoordinatorError::AlreadyExists
} else if err_str.contains("Not Found") || err_str.contains("Key not found") {
CoordinatorError::NotFound(err_str)
} else if err_str.contains("Storage Error") {
CoordinatorError::Storage(err_str)
} else if err_str.contains("DAG") {
CoordinatorError::Dag(err_str)
} else {
CoordinatorError::Rpc(err_str)
}
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_client_creation() {
let client = CoordinatorClient::new("http://127.0.0.1:9652");
assert!(client.is_ok());
}
}

View File

@@ -0,0 +1,248 @@
//! Data models for the Coordinator client
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::net::IpAddr;
// Re-export Job types from hero_job
pub use hero_job::{Job, JobStatus};
/// Timestamp type (Unix timestamp in seconds)
pub type Timestamp = u64;
// ==================== Actor ====================
/// Actor representation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Actor {
pub id: u32,
pub pubkey: String,
pub address: Vec<IpAddr>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
}
/// Parameters for creating an actor
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ActorCreate {
pub id: u32,
pub pubkey: String,
pub address: Vec<IpAddr>,
}
// ==================== Context ====================
/// Context representation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Context {
/// Redis DB to use
pub id: u32,
/// Actor ids which have admin rights on this context
pub admins: Vec<u32>,
/// Actor ids which can read the context info
pub readers: Vec<u32>,
/// Actor ids which can execute jobs in this context
pub executors: Vec<u32>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
}
/// Parameters for creating a context
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContextCreate {
pub id: u32,
pub admins: Vec<u32>,
pub readers: Vec<u32>,
pub executors: Vec<u32>,
}
// ==================== Runner ====================
/// Runner representation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Runner {
pub id: u32,
/// Mycelium public key
pub pubkey: String,
/// Mycelium address
pub address: IpAddr,
/// Message topic (e.g., "supervisor.rpc")
pub topic: String,
/// Script type this runner supports
pub script_type: ScriptType,
/// If true, the runner also listens on a local redis queue
pub local: bool,
/// Optional secret for authenticated supervisor calls
#[serde(skip_serializing_if = "Option::is_none")]
pub secret: Option<String>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
}
/// Parameters for creating a runner
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RunnerCreate {
pub id: u32,
pub pubkey: String,
pub address: IpAddr,
pub topic: String,
pub script_type: ScriptType,
pub local: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub secret: Option<String>,
}
/// Script type supported by a runner
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ScriptType {
Python,
V,
Osis,
Sal,
}
// ==================== Job ====================
/// Parameters for creating a job
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobCreate {
pub id: u32,
pub caller_id: u32,
pub context_id: u32,
pub script: String,
pub script_type: ScriptType,
pub timeout: u64,
#[serde(default)]
pub retries: u8,
#[serde(default)]
pub env_vars: HashMap<String, String>,
#[serde(default)]
pub prerequisites: Vec<u32>,
#[serde(default)]
pub depends: Vec<u32>,
}
// ==================== Flow ====================
/// Flow representation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Flow {
pub id: u32,
pub caller_id: u32,
pub context_id: u32,
pub jobs: Vec<u32>,
pub env_vars: HashMap<String, String>,
pub result: HashMap<String, String>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
pub status: FlowStatus,
}
/// Parameters for creating a flow
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlowCreate {
pub id: u32,
pub caller_id: u32,
pub context_id: u32,
pub jobs: Vec<u32>,
#[serde(default)]
pub env_vars: HashMap<String, String>,
}
/// Flow status
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum FlowStatus {
Created,
Dispatched,
Started,
Error,
Finished,
}
/// DAG representation of a flow
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlowDag {
pub flow_id: u32,
pub nodes: Vec<DagNode>,
pub edges: Vec<DagEdge>,
}
/// Node in a DAG
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DagNode {
pub job_id: u32,
pub status: JobStatus,
#[serde(skip_serializing_if = "Option::is_none")]
pub result: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}
/// Edge in a DAG (dependency relationship)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DagEdge {
pub from: u32,
pub to: u32,
}
// ==================== Message ====================
/// Message representation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
pub id: u32,
pub context_id: u32,
pub runner_id: u32,
pub job_id: u32,
pub message_type: MessageType,
pub format: MessageFormatType,
pub payload: String,
pub status: MessageStatus,
pub transport_status: TransportStatus,
pub created_at: Timestamp,
pub updated_at: Timestamp,
}
/// Parameters for creating a message
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MessageCreate {
pub id: u32,
pub context_id: u32,
pub runner_id: u32,
pub job_id: u32,
pub message_type: MessageType,
pub format: MessageFormatType,
pub payload: String,
}
/// Message type
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MessageType {
JobRun,
JobResult,
JobError,
}
/// Message format
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MessageFormatType {
JsonRpc,
}
/// Message status
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MessageStatus {
Created,
Sent,
Delivered,
Failed,
}
/// Transport status
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TransportStatus {
Pending,
Sent,
Delivered,
Failed,
}