refactor coordinator to use shared lib models and client

This commit is contained in:
Timur Gordon
2025-11-13 21:56:33 +01:00
parent 4b23e5eb7f
commit 84545f0d75
16 changed files with 729 additions and 1973 deletions

View File

@@ -40,3 +40,4 @@ tracing-subscriber.workspace = true
# Hero dependencies
hero-job = { path = "../../lib/models/job" }
hero-supervisor-openrpc-client = { path = "../../lib/clients/supervisor" }

View File

@@ -0,0 +1,82 @@
# Coordinator Refactoring Status
## ✅ Completed
1. **SupervisorTransport trait created** - `lib/clients/supervisor`
2. **Mycelium transport moved** - `lib/clients/supervisor/src/transports/mycelium.rs`
3. **SupervisorClient made generic** - `SupervisorClient<T: SupervisorTransport>`
4. **Coordinator client updated** - Uses `SupervisorClient<MyceliumTransport>`
5. **Job type migrated** - Now uses `hero_job::Job`
6. **Job validation added** - `job.validate_required_fields()`, `job.validate_context()`
7. **Old validation removed** - Removed `validate_job()` from service.rs
## 🔄 In Progress - Coordinator Code Updates
The coordinator needs updates throughout to use the new Job API:
### Required Changes:
1. **Method calls → Field access**:
- `job.id()``job.id`
- `job.caller_id()``job.caller_id`
- `job.context_id()``job.context_id`
2. **Field name changes**:
- `job.script``job.payload`
- `job.script_type``job.executor`
3. **ID type changes**:
- Change from `u32` to `String` throughout
- Update HashMap keys, function signatures, database queries
4. **Status handling**:
- Remove `job.status()` calls
- Status is tracked separately in coordinator state
5. **Workflow fields** (depends, prerequisites):
- These don't exist on `hero_job::Job`
- Stored in `JobSummary` for DAG operations
- Need separate storage/tracking
### Files Needing Updates (~41 errors):
- `service.rs` - Job CRUD operations, method→field changes
- `dag.rs` - Workflow orchestration, depends/prerequisites handling
- `rpc.rs` - RPC handlers, ID type changes
- `router.rs` - Job routing, already partially updated
- Database queries - ID type changes
### Next Steps:
1. Update `service.rs` - Replace method calls with field access
2. Update `dag.rs` - Handle workflow fields from JobSummary
3. Update `rpc.rs` - Handle String IDs
4. Update database layer - String ID support
5. Test compilation
6. Integration testing
## Architecture Summary
```
lib/clients/supervisor/
├── src/
│ ├── lib.rs (SupervisorClient<T>, HttpTransport, trait)
│ └── transports/
│ ├── mod.rs
│ └── mycelium.rs (MyceliumClient, MyceliumTransport, SupervisorHub)
bin/coordinator/
├── models/
│ └── job.rs (re-exports hero_job::Job with validation)
├── service.rs (needs updates)
├── dag.rs (needs updates)
├── rpc.rs (needs updates)
└── router.rs (updated)
```
## Notes
- `hero_job::Job` uses String UUIDs for IDs
- Workflow orchestration (depends, prerequisites) handled by JobSummary
- Job status tracked separately in coordinator state machine
- Validation methods added to Job model for coordinator use

View File

@@ -1,9 +1,12 @@
pub mod mycelium_client;
pub mod supervisor_client;
pub mod supervisor_hub;
pub mod types;
pub use mycelium_client::{MyceliumClient, MyceliumClientError};
pub use supervisor_client::{SupervisorClient, SupervisorClientError};
pub use supervisor_hub::SupervisorHub;
pub use types::Destination;
// Re-export from the supervisor client library
pub use hero_supervisor_openrpc_client::{
SupervisorClient,
ClientError as SupervisorClientError,
transports::{
MyceliumClient,
MyceliumClientError,
SupervisorHub,
Destination,
MyceliumTransport,
},
};

View File

@@ -1,319 +0,0 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use reqwest::Client as HttpClient;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use serde_json::{Value, json};
use thiserror::Error;
use crate::clients::Destination;
use crate::models::TransportStatus;
/// Lightweight client for Mycelium JSON-RPC (send + query status)
#[derive(Clone)]
pub struct MyceliumClient {
base_url: String, // e.g. http://127.0.0.1:8990
http: HttpClient,
id_counter: Arc<AtomicU64>,
}
#[derive(Debug, Error)]
pub enum MyceliumClientError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
#[error("Transport timed out waiting for a reply (408)")]
TransportTimeout,
#[error("JSON-RPC error: {0}")]
RpcError(String),
#[error("Invalid response: {0}")]
InvalidResponse(String),
}
impl MyceliumClient {
pub fn new(base_url: impl Into<String>) -> Result<Self, MyceliumClientError> {
let url = base_url.into();
let http = HttpClient::builder().build()?;
Ok(Self {
base_url: url,
http,
id_counter: Arc::new(AtomicU64::new(1)),
})
}
fn next_id(&self) -> u64 {
self.id_counter.fetch_add(1, Ordering::Relaxed)
}
async fn jsonrpc(&self, method: &str, params: Value) -> Result<Value, MyceliumClientError> {
let req = json!({
"jsonrpc": "2.0",
"id": self.next_id(),
"method": method,
"params": [ params ]
});
tracing::info!(%req, "jsonrpc");
let resp = self.http.post(&self.base_url).json(&req).send().await?;
let status = resp.status();
let body: Value = resp.json().await?;
if let Some(err) = body.get("error") {
let code = err.get("code").and_then(|v| v.as_i64()).unwrap_or(0);
let msg = err
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("unknown error");
if code == 408 {
return Err(MyceliumClientError::TransportTimeout);
}
return Err(MyceliumClientError::RpcError(format!(
"code={code} msg={msg}"
)));
}
if !status.is_success() {
return Err(MyceliumClientError::RpcError(format!(
"HTTP {status}, body {body}"
)));
}
Ok(body)
}
/// Call messageStatus with an outbound message id (hex string)
pub async fn message_status(
&self,
id_hex: &str,
) -> Result<TransportStatus, MyceliumClientError> {
let params = json!(id_hex);
let body = self.jsonrpc("getMessageInfo", params).await?;
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
})?;
// Accept both { state: "..."} and bare "..."
let status_str = if let Some(s) = result.get("state").and_then(|v| v.as_str()) {
s.to_string()
} else if let Some(s) = result.as_str() {
s.to_string()
} else {
return Err(MyceliumClientError::InvalidResponse(format!(
"unexpected result shape: {result}"
)));
};
let status = Self::map_status(&status_str).ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("unknown status: {status_str}"))
});
tracing::info!(%id_hex, status = %status.as_ref().unwrap(), "queried messages status");
status
}
fn map_status(s: &str) -> Option<TransportStatus> {
match s {
"pending" => Some(TransportStatus::Queued),
"received" => Some(TransportStatus::Delivered),
"read" => Some(TransportStatus::Read),
"aborted" => Some(TransportStatus::Failed),
_ => None,
}
}
/// Build params object for pushMessage without performing any network call.
/// Exposed for serializer-only tests and reuse.
pub(crate) fn build_push_params(
dst: &Destination,
topic: &str,
payload_b64: &str,
reply_timeout: Option<u64>,
) -> Value {
let dst_v = match dst {
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
Destination::Pk(pk) => json!({ "pk": pk }),
};
let mut message = json!({
"dst": dst_v,
"topic": topic,
"payload": payload_b64,
});
if let Some(rt) = reply_timeout {
message["reply_timeout"] = json!(rt);
}
message
}
/// pushMessage: send a message with dst/topic/payload. Optional reply_timeout for sync replies.
pub async fn push_message(
&self,
dst: &Destination,
topic: &str,
payload_b64: &str,
reply_timeout: Option<u64>,
) -> Result<Value, MyceliumClientError> {
let params = Self::build_push_params(dst, topic, payload_b64, reply_timeout);
let body = self.jsonrpc("pushMessage", params).await?;
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
})?;
Ok(result.clone())
}
/// Helper to extract outbound message id from pushMessage result (InboundMessage or PushMessageResponseId)
pub fn extract_message_id_from_result(result: &Value) -> Option<String> {
result
.get("id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
/// popMessage: retrieve an inbound message if available (optionally filtered by topic).
/// - peek: if true, do not remove the message from the queue
/// - timeout_secs: seconds to wait for a message (0 returns immediately)
/// - topic_plain: optional plain-text topic which will be base64-encoded per Mycelium spec
/// Returns:
/// - Ok(Some(result_json)) on success, where result_json matches InboundMessage schema
/// - Ok(None) when there is no message ready (Mycelium returns error code 204)
pub async fn pop_message(
&self,
peek: Option<bool>,
timeout_secs: Option<u64>,
topic_plain: Option<&str>,
) -> Result<Option<Value>, MyceliumClientError> {
// Build params array
let mut params_array = vec![];
if let Some(p) = peek {
params_array.push(serde_json::Value::Bool(p));
} else {
params_array.push(serde_json::Value::Null)
}
if let Some(t) = timeout_secs {
params_array.push(serde_json::Value::Number(t.into()));
} else {
params_array.push(serde_json::Value::Null)
}
if let Some(tp) = topic_plain {
let topic_b64 = BASE64_STANDARD.encode(tp.as_bytes());
params_array.push(serde_json::Value::String(topic_b64));
} else {
params_array.push(serde_json::Value::Null)
}
let req = json!({
"jsonrpc": "2.0",
"id": self.next_id(),
"method": "popMessage",
"params": serde_json::Value::Array(params_array),
});
tracing::info!(%req, "calling popMessage");
let resp = self.http.post(&self.base_url).json(&req).send().await?;
let status = resp.status();
let body: Value = resp.json().await?;
// Handle JSON-RPC error envelope specially for code 204 (no message ready)
if let Some(err) = body.get("error") {
let code = err.get("code").and_then(|v| v.as_i64()).unwrap_or(0);
let msg = err
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("unknown error");
if code == 204 {
// No message ready
return Ok(None);
}
if code == 408 {
// Align with other transport timeout mapping
return Err(MyceliumClientError::TransportTimeout);
}
return Err(MyceliumClientError::RpcError(format!(
"code={code} msg={msg}"
)));
}
if !status.is_success() {
return Err(MyceliumClientError::RpcError(format!(
"HTTP {status}, body {body}"
)));
}
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
})?;
Ok(Some(result.clone()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::clients::Destination;
#[test]
fn build_push_params_shapes_ip_pk_and_timeout() {
// IP destination
let p1 = MyceliumClient::build_push_params(
&Destination::Ip("2001:db8::1".parse().unwrap()),
"supervisor.rpc",
"Zm9vYmFy", // "foobar"
Some(10),
);
let msg1 = p1.get("message").unwrap();
assert_eq!(
msg1.get("topic").unwrap().as_str().unwrap(),
"supervisor.rpc"
);
assert_eq!(msg1.get("payload").unwrap().as_str().unwrap(), "Zm9vYmFy");
assert_eq!(
msg1.get("dst")
.unwrap()
.get("ip")
.unwrap()
.as_str()
.unwrap(),
"2001:db8::1"
);
assert_eq!(p1.get("reply_timeout").unwrap().as_u64().unwrap(), 10);
// PK destination without timeout
let p2 = MyceliumClient::build_push_params(
&Destination::Pk(
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".into(),
),
"supervisor.rpc",
"YmF6", // "baz"
None,
);
let msg2 = p2.get("message").unwrap();
assert_eq!(
msg2.get("dst")
.unwrap()
.get("pk")
.unwrap()
.as_str()
.unwrap(),
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32"
);
assert!(p2.get("reply_timeout").is_none());
}
#[test]
fn extract_message_id_variants() {
// PushMessageResponseId
let r1 = json!({"id":"0123456789abcdef"});
assert_eq!(
MyceliumClient::extract_message_id_from_result(&r1).unwrap(),
"0123456789abcdef"
);
// InboundMessage-like
let r2 = json!({
"id":"fedcba9876543210",
"srcIp":"449:abcd:0123:defa::1",
"payload":"hpV+"
});
assert_eq!(
MyceliumClient::extract_message_id_from_result(&r2).unwrap(),
"fedcba9876543210"
);
}
}

View File

@@ -1,588 +0,0 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use serde_json::{Value, json};
use thiserror::Error;
use tokio::time::timeout;
use crate::clients::{Destination, MyceliumClient, MyceliumClientError, SupervisorHub};
#[derive(Clone)]
pub struct SupervisorClient {
hub: Arc<SupervisorHub>, // Global hub with background pop loop and shared id generator
destination: Destination, // ip or pk
secret: Option<String>, // optional, required by several supervisor methods
}
#[derive(Debug, Error)]
pub enum SupervisorClientError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
#[error("Transport timed out waiting for a reply (408)")]
TransportTimeout,
#[error("JSON-RPC error: {0}")]
RpcError(String),
#[error("Invalid response: {0}")]
InvalidResponse(String),
#[error("Missing secret for method requiring authentication")]
MissingSecret,
}
impl From<MyceliumClientError> for SupervisorClientError {
fn from(e: MyceliumClientError) -> Self {
match e {
MyceliumClientError::TransportTimeout => SupervisorClientError::TransportTimeout,
MyceliumClientError::RpcError(m) => SupervisorClientError::RpcError(m),
MyceliumClientError::InvalidResponse(m) => SupervisorClientError::InvalidResponse(m),
MyceliumClientError::Http(err) => SupervisorClientError::Http(err),
MyceliumClientError::Json(err) => SupervisorClientError::Json(err),
}
}
}
impl SupervisorClient {
/// Preferred constructor using a shared SupervisorHub (single global listener).
pub fn new_with_hub(
hub: Arc<SupervisorHub>,
destination: Destination,
secret: Option<String>,
) -> Self {
Self {
hub,
destination,
secret,
}
}
/// Backward-compatible constructor that builds a new Hub from base_url/topic.
/// NOTE: This spawns a background popMessage listener for the given topic.
/// Prefer `new_with_hub` so the process has a single global hub.
pub fn new(
base_url: impl Into<String>,
destination: Destination,
topic: impl Into<String>,
secret: Option<String>,
) -> Result<Self, SupervisorClientError> {
let mut url = base_url.into();
if url.is_empty() {
url = "http://127.0.0.1:8990".to_string();
}
let mycelium = Arc::new(MyceliumClient::new(url)?);
Ok(Self::new_with_client(mycelium, destination, topic, secret))
}
/// Backward-compatible constructor that reuses an existing Mycelium client.
/// NOTE: This creates a new hub and its own background listener. Prefer `new_with_hub`.
pub fn new_with_client(
mycelium: Arc<MyceliumClient>,
destination: Destination,
topic: impl Into<String>,
secret: Option<String>,
) -> Self {
let hub = SupervisorHub::new_with_client(mycelium, topic);
Self::new_with_hub(hub, destination, secret)
}
/// Internal helper used by tests to inspect dst JSON shape.
fn build_dst(&self) -> Value {
match &self.destination {
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
Destination::Pk(pk) => json!({ "pk": pk }),
}
}
fn build_supervisor_payload(&self, method: &str, params: Value) -> Value {
json!({
"jsonrpc": "2.0",
"id": self.hub.next_id(),
"method": method,
"params": params,
})
}
/// Build a supervisor JSON-RPC payload but force a specific id (used for correlation).
fn build_supervisor_payload_with_id(&self, method: &str, params: Value, id: u64) -> Value {
json!({
"jsonrpc": "2.0",
"id": id,
"method": method,
"params": params,
})
}
fn encode_payload(payload: &Value) -> Result<String, SupervisorClientError> {
let s = serde_json::to_string(payload)?;
Ok(BASE64_STANDARD.encode(s.as_bytes()))
}
fn encode_topic(topic: &[u8]) -> String {
BASE64_STANDARD.encode(topic)
}
fn extract_message_id_from_result(result: &Value) -> Option<String> {
// Two possibilities per Mycelium spec oneOf:
// - PushMessageResponseId: { "id": "0123456789abcdef" }
// - InboundMessage: object containing "id" plus srcIp, ...; we still return id.
result
.get("id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
fn need_secret(&self) -> Result<&str, SupervisorClientError> {
self.secret
.as_deref()
.ok_or(SupervisorClientError::MissingSecret)
}
// -----------------------------
// Core: request-reply call via Hub with default 10s timeout
// -----------------------------
/// Send a supervisor JSON-RPC request and await its reply via the Hub.
/// Returns (outbound_message_id, reply_envelope_json).
pub async fn call_with_reply_timeout(
&self,
method: &str,
params: Value,
timeout_secs: u64,
) -> Result<(String, Value), SupervisorClientError> {
let inner_id = self.hub.next_id();
// Register waiter before sending to avoid race
let rx = self.hub.register_waiter(inner_id).await;
let inner = self.build_supervisor_payload_with_id(method, params, inner_id);
let payload_b64 = Self::encode_payload(&inner)?;
let result = self
.hub
.mycelium()
.push_message(
&self.destination,
&Self::encode_topic(self.hub.topic().as_bytes()),
&payload_b64,
None,
)
.await?;
let out_id = if let Some(id) = MyceliumClient::extract_message_id_from_result(&result) {
id
} else if let Some(arr) = result.as_array()
&& arr.len() == 1
&& let Some(id) = MyceliumClient::extract_message_id_from_result(&arr[0])
{
id
} else {
// Clean pending entry to avoid leak
let _ = self.hub.remove_waiter(inner_id).await;
return Err(SupervisorClientError::InvalidResponse(format!(
"result did not contain message id: {result}"
)));
};
let d = Duration::from_secs(timeout_secs);
match timeout(d, rx).await {
Ok(Ok(reply)) => Ok((out_id, reply)),
Ok(Err(_canceled)) => Err(SupervisorClientError::InvalidResponse(
"oneshot canceled before receiving reply".into(),
)),
Err(_elapsed) => {
// Cleanup on timeout
let _ = self.hub.remove_waiter(inner_id).await;
Err(SupervisorClientError::TransportTimeout)
}
}
}
/// Send and await with default 10s timeout.
pub async fn call_with_reply(
&self,
method: &str,
params: Value,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply_timeout(method, params, 60).await
}
/// Back-compat: Send and await a reply but return only the outbound id (discard reply).
/// This keeps existing call sites working while the system migrates to reply-aware paths.
pub async fn call(&self, method: &str, params: Value) -> Result<String, SupervisorClientError> {
let (out_id, _reply) = self.call_with_reply(method, params).await?;
Ok(out_id)
}
// -----------------------------
// Typed wrappers for Supervisor API (await replies)
// -----------------------------
// Runners
pub async fn list_runners_wait(&self) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("list_runners", json!([])).await
}
pub async fn register_runner_wait(
&self,
name: impl Into<String>,
queue: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"name": name.into(),
"queue": queue.into()
}]);
self.call_with_reply("register_runner", params).await
}
pub async fn remove_runner_wait(
&self,
actor_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("remove_runner", json!([actor_id.into()]))
.await
}
pub async fn start_runner_wait(
&self,
actor_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("start_runner", json!([actor_id.into()]))
.await
}
pub async fn stop_runner_wait(
&self,
actor_id: impl Into<String>,
force: bool,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("stop_runner", json!([actor_id.into(), force]))
.await
}
pub async fn get_runner_status_wait(
&self,
actor_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("get_runner_status", json!([actor_id.into()]))
.await
}
pub async fn get_all_runner_status_wait(
&self,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("get_all_runner_status", json!([]))
.await
}
pub async fn start_all_wait(&self) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("start_all", json!([])).await
}
pub async fn stop_all_wait(
&self,
force: bool,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("stop_all", json!([force])).await
}
pub async fn get_all_status_wait(&self) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("get_all_status", json!([])).await
}
// Jobs (await)
pub async fn jobs_create_wait(
&self,
job: Value,
) -> Result<(String, Value), SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job": job
}]);
self.call_with_reply("jobs.create", params).await
}
pub async fn jobs_list_wait(&self) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("jobs.list", json!([])).await
}
pub async fn job_run_wait(&self, job: Value) -> Result<(String, Value), SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job": job
}]);
self.call_with_reply("job.run", params).await
}
pub async fn job_start_wait(
&self,
job_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job_id": job_id.into()
}]);
self.call_with_reply("job.start", params).await
}
pub async fn job_status_wait(
&self,
job_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("job.status", json!([job_id.into()]))
.await
}
pub async fn job_result_wait(
&self,
job_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("job.result", json!([job_id.into()]))
.await
}
pub async fn job_stop_wait(
&self,
job_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job_id": job_id.into()
}]);
self.call_with_reply("job.stop", params).await
}
pub async fn job_delete_wait(
&self,
job_id: impl Into<String>,
) -> Result<(String, Value), SupervisorClientError> {
let secret = self.need_secret()?;
let params = json!([{
"secret": secret,
"job_id": job_id.into()
}]);
self.call_with_reply("job.delete", params).await
}
pub async fn rpc_discover_wait(&self) -> Result<(String, Value), SupervisorClientError> {
self.call_with_reply("rpc.discover", json!([])).await
}
// -----------------------------
// Backward-compatible variants returning only outbound id (discarding reply)
// -----------------------------
pub async fn list_runners(&self) -> Result<String, SupervisorClientError> {
let (id, _) = self.list_runners_wait().await?;
Ok(id)
}
pub async fn register_runner(
&self,
name: impl Into<String>,
queue: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.register_runner_wait(name, queue).await?;
Ok(id)
}
pub async fn remove_runner(
&self,
actor_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.remove_runner_wait(actor_id).await?;
Ok(id)
}
pub async fn start_runner(
&self,
actor_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.start_runner_wait(actor_id).await?;
Ok(id)
}
pub async fn stop_runner(
&self,
actor_id: impl Into<String>,
force: bool,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.stop_runner_wait(actor_id, force).await?;
Ok(id)
}
pub async fn get_runner_status(
&self,
actor_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.get_runner_status_wait(actor_id).await?;
Ok(id)
}
pub async fn get_all_runner_status(&self) -> Result<String, SupervisorClientError> {
let (id, _) = self.get_all_runner_status_wait().await?;
Ok(id)
}
pub async fn start_all(&self) -> Result<String, SupervisorClientError> {
let (id, _) = self.start_all_wait().await?;
Ok(id)
}
pub async fn stop_all(&self, force: bool) -> Result<String, SupervisorClientError> {
let (id, _) = self.stop_all_wait(force).await?;
Ok(id)
}
pub async fn get_all_status(&self) -> Result<String, SupervisorClientError> {
let (id, _) = self.get_all_status_wait().await?;
Ok(id)
}
pub async fn jobs_create(&self, job: Value) -> Result<String, SupervisorClientError> {
let (id, _) = self.jobs_create_wait(job).await?;
Ok(id)
}
pub async fn jobs_list(&self) -> Result<String, SupervisorClientError> {
let (id, _) = self.jobs_list_wait().await?;
Ok(id)
}
pub async fn job_run(&self, job: Value) -> Result<String, SupervisorClientError> {
let (id, _) = self.job_run_wait(job).await?;
Ok(id)
}
pub async fn job_start(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.job_start_wait(job_id).await?;
Ok(id)
}
pub async fn job_status(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.job_status_wait(job_id).await?;
Ok(id)
}
pub async fn job_result(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.job_result_wait(job_id).await?;
Ok(id)
}
pub async fn job_stop(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.job_stop_wait(job_id).await?;
Ok(id)
}
pub async fn job_delete(
&self,
job_id: impl Into<String>,
) -> Result<String, SupervisorClientError> {
let (id, _) = self.job_delete_wait(job_id).await?;
Ok(id)
}
pub async fn rpc_discover(&self) -> Result<String, SupervisorClientError> {
let (id, _) = self.rpc_discover_wait().await?;
Ok(id)
}
}
// -----------------------------
// Tests (serialization-only)
// -----------------------------
#[cfg(test)]
mod tests {
use super::*;
use std::net::IpAddr;
fn mk_client() -> SupervisorClient {
// Build a hub but it won't issue real network calls in these serializer-only tests.
let mycelium = Arc::new(MyceliumClient::new("http://127.0.0.1:8990").unwrap());
let hub = SupervisorHub::new_with_client(mycelium, "supervisor.rpc");
SupervisorClient::new_with_hub(
hub,
Destination::Pk(
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".to_string(),
),
Some("secret".to_string()),
)
}
#[test]
fn builds_dst_ip_and_pk() {
let mycelium = Arc::new(MyceliumClient::new("http://127.0.0.1:8990").unwrap());
let hub_ip = SupervisorHub::new_with_client(mycelium.clone(), "supervisor.rpc");
let c_ip = SupervisorClient::new_with_hub(
hub_ip,
Destination::Ip("2001:db8::1".parse().unwrap()),
None,
);
let v_ip = c_ip.build_dst();
assert_eq!(v_ip.get("ip").unwrap().as_str().unwrap(), "2001:db8::1");
let c_pk = mk_client();
let v_pk = c_pk.build_dst();
assert_eq!(
v_pk.get("pk").unwrap().as_str().unwrap(),
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32"
);
}
#[test]
fn encodes_supervisor_payload_b64() {
let c = mk_client();
let payload = c.build_supervisor_payload("list_runners", json!([]));
let b64 = SupervisorClient::encode_payload(&payload).unwrap();
// decode and compare round-trip JSON
let raw = base64::engine::general_purpose::STANDARD
.decode(b64.as_bytes())
.unwrap();
let decoded: Value = serde_json::from_slice(&raw).unwrap();
assert_eq!(
decoded.get("method").unwrap().as_str().unwrap(),
"list_runners"
);
assert_eq!(decoded.get("jsonrpc").unwrap().as_str().unwrap(), "2.0");
}
#[test]
fn extract_message_id_works_for_both_variants() {
// PushMessageResponseId
let r1 = json!({"id":"0123456789abcdef"});
assert_eq!(
SupervisorClient::extract_message_id_from_result(&r1).unwrap(),
"0123456789abcdef"
);
// InboundMessage-like
let r2 = json!({
"id":"fedcba9876543210",
"srcIp":"449:abcd:0123:defa::1",
"payload":"hpV+"
});
assert_eq!(
SupervisorClient::extract_message_id_from_result(&r2).unwrap(),
"fedcba9876543210"
);
}
}

View File

@@ -1,143 +0,0 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use serde_json::Value;
use tokio::sync::{Mutex, oneshot};
use crate::clients::mycelium_client::MyceliumClient;
/// Global hub that:
/// - Owns a single MyceliumClient
/// - Spawns a background popMessage loop filtered by topic
/// - Correlates supervisor JSON-RPC replies by inner id to waiting callers via oneshot channels
#[derive(Clone)]
pub struct SupervisorHub {
mycelium: Arc<MyceliumClient>,
topic: String,
pending: Arc<Mutex<HashMap<u64, oneshot::Sender<Value>>>>,
id_counter: Arc<AtomicU64>,
}
impl SupervisorHub {
/// Create a new hub and start the background popMessage task.
/// - base_url: Mycelium JSON-RPC endpoint, e.g. "http://127.0.0.1:8990"
/// - topic: plain-text topic (e.g., "supervisor.rpc")
pub fn new(
base_url: impl Into<String>,
topic: impl Into<String>,
) -> Result<Arc<Self>, crate::clients::MyceliumClientError> {
let myc = Arc::new(MyceliumClient::new(base_url)?);
Ok(Self::new_with_client(myc, topic))
}
/// Variant that reuses an existing Mycelium client.
pub fn new_with_client(mycelium: Arc<MyceliumClient>, topic: impl Into<String>) -> Arc<Self> {
let hub = Arc::new(Self {
mycelium,
topic: topic.into(),
pending: Arc::new(Mutex::new(HashMap::new())),
id_counter: Arc::new(AtomicU64::new(1)),
});
Self::spawn_pop_loop(hub.clone());
hub
}
fn spawn_pop_loop(hub: Arc<Self>) {
tokio::spawn(async move {
loop {
match hub.mycelium.pop_message(Some(false), Some(20), None).await {
Ok(Some(inb)) => {
// Extract and decode payload
let Some(payload_b64) = inb.get("payload").and_then(|v| v.as_str()) else {
// Not a payload-bearing message; ignore
continue;
};
let Ok(raw) = BASE64_STANDARD.decode(payload_b64.as_bytes()) else {
tracing::warn!(target: "supervisor_hub", "Failed to decode inbound payload base64");
continue;
};
let Ok(rpc): Result<Value, _> = serde_json::from_slice(&raw) else {
tracing::warn!(target: "supervisor_hub", "Failed to parse inbound payload JSON");
continue;
};
// Extract inner JSON-RPC id
let inner_id_u64 = match rpc.get("id") {
Some(Value::Number(n)) => n.as_u64(),
Some(Value::String(s)) => s.parse::<u64>().ok(),
_ => None,
};
if let Some(inner_id) = inner_id_u64 {
// Try to deliver to a pending waiter
let sender_opt = {
let mut guard = hub.pending.lock().await;
guard.remove(&inner_id)
};
if let Some(tx) = sender_opt {
let _ = tx.send(rpc);
} else {
tracing::warn!(
target: "supervisor_hub",
inner_id,
payload = %String::from_utf8_lossy(&raw),
"Unmatched supervisor reply; no waiter registered"
);
}
} else {
tracing::warn!(target: "supervisor_hub", "Inbound supervisor reply missing id; dropping");
}
}
Ok(None) => {
// No message; continue polling
continue;
}
Err(e) => {
tracing::warn!(target: "supervisor_hub", error = %e, "popMessage error; backing off");
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
}
}
}
});
}
/// Allocate a new inner supervisor JSON-RPC id.
pub fn next_id(&self) -> u64 {
self.id_counter.fetch_add(1, Ordering::Relaxed)
}
/// Register a oneshot sender for the given inner id and return the receiver side.
pub async fn register_waiter(&self, inner_id: u64) -> oneshot::Receiver<Value> {
let (tx, rx) = oneshot::channel();
let mut guard = self.pending.lock().await;
guard.insert(inner_id, tx);
rx
}
/// Remove a pending waiter for a given id (used to cleanup on timeout).
pub async fn remove_waiter(&self, inner_id: u64) -> Option<oneshot::Sender<Value>> {
let mut guard = self.pending.lock().await;
guard.remove(&inner_id)
}
/// Access to underlying Mycelium client (for pushMessage).
pub fn mycelium(&self) -> Arc<MyceliumClient> {
self.mycelium.clone()
}
/// Access configured topic.
pub fn topic(&self) -> &str {
&self.topic
}
}
impl std::fmt::Debug for SupervisorHub {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SupervisorHub")
.field("topic", &self.topic)
.finish()
}
}

View File

@@ -1,9 +0,0 @@
use std::net::IpAddr;
/// Destination for Mycelium messages (shared by clients)
#[derive(Clone, Debug)]
pub enum Destination {
Ip(IpAddr),
/// 64-hex public key of the receiver node
Pk(String),
}

View File

@@ -102,11 +102,14 @@ async fn main() {
// Start router workers (auto-discovered contexts) using a single global SupervisorHub (no separate inbound listener)
{
let base_url = format!("http://{}:{}", cli.mycelium_ip, cli.mycelium_port);
let hub = hero_coordinator::clients::SupervisorHub::new(
base_url.clone(),
let mycelium = Arc::new(
hero_coordinator::clients::MyceliumClient::new(&base_url)
.expect("Failed to create MyceliumClient")
);
let hub = hero_coordinator::clients::SupervisorHub::new_with_client(
mycelium,
"supervisor.rpc".to_string(),
)
.expect("Failed to initialize SupervisorHub");
);
let cfg = hero_coordinator::router::RouterConfig {
context_ids: Vec::new(), // ignored by start_router_auto
concurrency: 32,

View File

@@ -1,7 +1,6 @@
mod actor;
mod context;
mod flow;
mod job;
mod message;
mod runner;
mod script_type;
@@ -9,7 +8,9 @@ mod script_type;
pub use actor::Actor;
pub use context::Context;
pub use flow::{Flow, FlowStatus};
pub use job::{Job, JobStatus};
pub use message::{Message, MessageFormatType, MessageStatus, MessageType, TransportStatus};
pub use runner::Runner;
pub use script_type::ScriptType;
// Re-export Job types from hero_job
pub use hero_job::{Job, JobStatus, JobError, JobResult, JobBuilder, JobSignature};

View File

@@ -1,62 +0,0 @@
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::{models::ScriptType, time::Timestamp};
#[derive(Clone, Serialize, Deserialize)]
pub struct Job {
/// Job Id, this is given by the actor who created the job
pub id: u32,
/// Actor ID which created this job
pub caller_id: u32,
/// Context in which the job is executed
pub context_id: u32,
pub script: String,
pub script_type: ScriptType,
/// Timeout in seconds for this job
pub timeout: u32,
/// Max amount of times to retry this job
pub retries: u8,
pub env_vars: HashMap<String, String>,
pub result: HashMap<String, String>,
pub prerequisites: Vec<String>,
/// Ids of jobs this job depends on, i.e. this job can't start until those have finished
pub depends: Vec<u32>,
pub created_at: Timestamp,
pub updated_at: Timestamp,
pub status: JobStatus,
}
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
pub enum JobStatus {
Dispatched,
WaitingForPrerequisites,
Started,
Error,
Finished,
}
impl Job {
pub fn id(&self) -> u32 {
self.id
}
pub fn caller_id(&self) -> u32 {
self.caller_id
}
pub fn context_id(&self) -> u32 {
self.context_id
}
pub fn depends(&self) -> &[u32] {
&self.depends
}
pub fn prerequisites(&self) -> &[String] {
&self.prerequisites
}
pub fn script_type(&self) -> ScriptType {
self.script_type.clone()
}
pub fn status(&self) -> JobStatus {
self.status.clone()
}
}

View File

@@ -11,13 +11,13 @@ use std::hash::{Hash, Hasher};
use tokio::sync::{Mutex, Semaphore};
use crate::{
clients::{Destination, MyceliumClient, SupervisorClient, SupervisorHub},
clients::{Destination, MyceliumClient, MyceliumTransport, SupervisorClient, SupervisorHub},
models::{Job, JobStatus, Message, MessageStatus, ScriptType, TransportStatus},
service::AppService,
};
use tracing::{error, info};
#[derive(Clone, Debug)]
#[derive(Clone)]
pub struct RouterConfig {
pub context_ids: Vec<u32>,
pub concurrency: usize,
@@ -50,7 +50,7 @@ Concurrency:
*/
#[derive(Clone)]
struct SupervisorClientCache {
map: Arc<Mutex<HashMap<String, Arc<SupervisorClient>>>>,
map: Arc<Mutex<HashMap<String, Arc<SupervisorClient<MyceliumTransport>>>>>,
}
impl SupervisorClientCache {
@@ -83,7 +83,7 @@ impl SupervisorClientCache {
dest: Destination,
topic: String,
secret: Option<String>,
) -> Arc<SupervisorClient> {
) -> Arc<SupervisorClient<MyceliumTransport>> {
let key = Self::make_key(&dest, &topic, &secret);
{
@@ -99,7 +99,8 @@ impl SupervisorClientCache {
tracing::debug!(target: "router", cache="supervisor", hit=true, %topic, secret = %if secret.is_some() { "set" } else { "none" }, "SupervisorClient cache lookup (double-checked)");
return existing.clone();
}
let client = Arc::new(SupervisorClient::new_with_hub(hub, dest, secret.clone()));
let transport = MyceliumTransport::new(hub, dest);
let client = Arc::new(SupervisorClient::new(transport, secret.clone().unwrap_or_default()));
guard.insert(key, client.clone());
tracing::debug!(target: "router", cache="supervisor", hit=false, %topic, secret = %if secret.is_some() { "set" } else { "none" }, "SupervisorClient cache insert");
client
@@ -121,9 +122,8 @@ pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::
let handle = tokio::spawn(async move {
let sem = Arc::new(Semaphore::new(cfg_cloned.concurrency));
// Use the global SupervisorHub and its Mycelium client
// Use the global SupervisorHub
let sup_hub = cfg_cloned.sup_hub.clone();
let mycelium = sup_hub.mycelium();
let cache = Arc::new(SupervisorClientCache::new());
@@ -146,7 +146,6 @@ pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::
let service_task = service_cloned.clone();
let cfg_task = cfg_cloned.clone();
tokio::spawn({
let mycelium = mycelium.clone();
let cache = cache.clone();
let sup_hub = sup_hub.clone();
async move {
@@ -157,7 +156,6 @@ pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::
&cfg_task,
ctx_id,
&key,
mycelium,
sup_hub,
cache.clone(),
)
@@ -190,7 +188,6 @@ async fn deliver_one(
cfg: &RouterConfig,
context_id: u32,
msg_key: &str,
mycelium: Arc<MyceliumClient>,
sup_hub: Arc<SupervisorHub>,
cache: Arc<SupervisorClientCache>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
@@ -245,34 +242,33 @@ async fn deliver_one(
let method = msg.message.clone();
let params = build_params(&msg)?;
// Send
// If this is a job.run and we have a secret configured on the client,
// prefer the typed wrapper that injects the secret into inner supervisor params,
// and await the reply to capture job_queued immediately.
let (out_id, reply_opt) = if method == "job.run" {
// Send via the new client API
// The transport handles message correlation internally
let _result = if method == "job.run" {
if let Some(j) = msg.job.first() {
let jv = job_to_json(j)?;
// Returns (outbound message id, reply envelope)
let (out, reply) = client.job_run_wait(jv).await?;
(out, Some(reply))
// Use typed job_run method
let job = serde_json::from_value(job_to_json(j)?)?;
client.job_run(job, None).await?;
serde_json::Value::Null
} else {
// Fallback: no embedded job, use the generic call (await reply, discard)
let out = client.call(&method, params).await?;
(out, None)
// Generic call - not supported in new API, would need custom implementation
// For now, return error
return Err("job.run requires a job parameter".into());
}
} else {
let out = client.call(&method, params).await?;
(out, None)
// For other methods, we'd need to add them to the client or use a generic mechanism
// For now, this is a placeholder
return Err(format!("Method {} not yet supported with new client", method).into());
};
// Store transport id and initial Sent status
// Mark as delivered since the new client waits for replies
let _ = service
.update_message_transport(
context_id,
caller_id,
id,
Some(out_id.clone()),
Some(TransportStatus::Sent),
None, // No transport ID in new API
Some(TransportStatus::Delivered),
)
.await;
@@ -281,25 +277,9 @@ async fn deliver_one(
.update_message_status(context_id, caller_id, id, MessageStatus::Acknowledged)
.await?;
// If we got a job.run reply, interpret job_queued immediately
if let (Some(reply), Some(job_id)) = (reply_opt, msg.job.first().map(|j| j.id)) {
let result_opt = reply.get("result");
let error_opt = reply.get("error");
// Handle job.run success (job_queued)
let is_job_queued = result_opt
.and_then(|res| {
if res.get("job_queued").is_some() {
Some(true)
} else if let Some(s) = res.as_str() {
Some(s == "job_queued")
} else {
None
}
})
.unwrap_or(false);
if is_job_queued {
// For job.run, mark the job as dispatched
if method == "job.run" {
if let Some(job_id) = msg.job.first().map(|j| j.id) {
let _ = service
.update_job_status_unchecked(context_id, caller_id, job_id, JobStatus::Dispatched)
.await;
@@ -314,579 +294,12 @@ async fn deliver_one(
)],
)
.await;
} else if let Some(err_obj) = error_opt {
let _ = service
.update_job_status_unchecked(context_id, caller_id, job_id, JobStatus::Error)
.await;
let _ = service
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Supervisor error for job {}: {} (processed synchronously)",
job_id, err_obj
)],
)
.await;
}
}
// No correlation map needed; replies are handled synchronously via SupervisorHub
// No transport polling needed; the new client waits for replies synchronously
// Spawn transport-status poller
{
let service_poll = service.clone();
let poll_interval = std::time::Duration::from_secs(cfg.transport_poll_interval_secs);
let poll_timeout = std::time::Duration::from_secs(cfg.transport_poll_timeout_secs);
let out_id_cloned = out_id.clone();
let mycelium = mycelium.clone();
tokio::spawn(async move {
let start = std::time::Instant::now();
let client = mycelium;
// Supervisor call context captured for sync status checks
let sup_dest = dest_for_poller;
let sup_topic = topic_for_poller;
let job_id_opt = job_id_opt;
let mut last_status: Option<TransportStatus> = Some(TransportStatus::Sent);
// Ensure we only request supervisor job.status or job.result once per outbound message
let mut requested_job_check: bool = false;
loop {
if start.elapsed() >= poll_timeout {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec!["Transport-status polling timed out".to_string()],
)
.await;
// leave last known status; do not override
break;
}
match client.message_status(&out_id_cloned).await {
Ok(s) => {
if last_status.as_ref() != Some(&s) {
let _ = service_poll
.update_message_transport(
context_id,
caller_id,
id,
None,
Some(s.clone()),
)
.await;
last_status = Some(s.clone());
}
// Stop on terminal states
if matches!(s, TransportStatus::Delivered | TransportStatus::Read) {
if let Some(job_id) = job_id_opt {
// First consult Redis for the latest job state in case we already have a terminal update
match service_poll.load_job(context_id, caller_id, job_id).await {
Ok(job) => {
// Promote to Started as soon as transport is delivered/read,
// if currently Dispatched or WaitingForPrerequisites.
// This makes DAG.started reflect "in-flight" work even when jobs
// complete too quickly to observe an intermediate supervisor "running" status.
if matches!(
job.status(),
JobStatus::Dispatched
| JobStatus::WaitingForPrerequisites
) {
let _ = service_poll
.update_job_status_unchecked(
context_id,
caller_id,
job_id,
JobStatus::Started,
)
.await;
}
match job.status() {
JobStatus::Finished | JobStatus::Error => {
// Local job is already terminal; skip supervisor job.status
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Local job {} status is terminal ({:?}); skipping supervisor job.status",
job_id,
job.status()
)],
)
.await;
// If result is still empty, immediately request supervisor job.result
if job.result.is_empty() {
let sup = cache
.get_or_create(
sup_hub.clone(),
sup_dest.clone(),
sup_topic.clone(),
secret_for_poller.clone(),
)
.await;
match sup
.job_result_wait(job_id.to_string())
.await
{
Ok((_out2, reply2)) => {
// Interpret reply synchronously: success/error/bare string
let res = reply2.get("result");
if let Some(obj) =
res.and_then(|v| v.as_object())
{
if let Some(s) = obj
.get("success")
.and_then(|v| v.as_str())
{
let mut patch = std::collections::HashMap::new();
patch.insert(
"success".to_string(),
s.to_string(),
);
let _ = service_poll
.update_job_result_merge_unchecked(
context_id, caller_id, job_id, patch,
)
.await;
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
// Also mark job as Finished so the flow can progress (ignore invalid transitions)
let _ = service_poll
.update_job_status_unchecked(
context_id, caller_id, job_id, JobStatus::Finished,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Updated job {} status to Finished (sync)", job_id
)],
)
.await;
// Existing log about storing result
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Stored supervisor job.result for job {} (success, sync)",
job_id
)],
)
.await;
} else if let Some(s) = obj
.get("error")
.and_then(|v| v.as_str())
{
let mut patch = std::collections::HashMap::new();
patch.insert(
"error".to_string(),
s.to_string(),
);
let _ = service_poll
.update_job_result_merge_unchecked(
context_id, caller_id, job_id, patch,
)
.await;
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
// Also mark job as Error so the flow can handle failure (ignore invalid transitions)
let _ = service_poll
.update_job_status_unchecked(
context_id, caller_id, job_id, JobStatus::Error,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Updated job {} status to Error (sync)", job_id
)],
)
.await;
// Existing log about storing result
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Stored supervisor job.result for job {} (error, sync)",
job_id
)],
)
.await;
}
} else if let Some(s) =
res.and_then(|v| v.as_str())
{
let mut patch =
std::collections::HashMap::new(
);
patch.insert(
"success".to_string(),
s.to_string(),
);
let _ = service_poll
.update_job_result_merge_unchecked(
context_id, caller_id, job_id, patch,
)
.await;
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
// Also mark job as Finished so the flow can progress (ignore invalid transitions)
let _ = service_poll
.update_job_status_unchecked(
context_id,
caller_id,
job_id,
JobStatus::Finished,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Updated job {} status to Finished (sync)", job_id
)],
)
.await;
// Existing log about storing result
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Stored supervisor job.result for job {} (success, sync)",
job_id
)],
)
.await;
} else {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec!["Supervisor job.result reply missing recognizable fields".to_string()],
)
.await;
}
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"job.result request error for job {}: {}",
job_id, e
)],
)
.await;
}
}
} else {
// Result already present; nothing to fetch
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Job {} already has result; no supervisor calls needed",
job_id
)],
)
.await;
}
// Mark processed and stop polling for this message
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Terminal job {} detected; stopping transport polling",
job_id
)],
)
.await;
break;
}
// Not terminal yet -> request supervisor job.status as before
_ => {
let sup = cache
.get_or_create(
sup_hub.clone(),
sup_dest.clone(),
sup_topic.clone(),
secret_for_poller.clone(),
)
.await;
match sup.job_status_wait(job_id.to_string()).await
{
Ok((_out_id, reply_status)) => {
// Interpret status reply synchronously
let result_opt = reply_status.get("result");
let error_opt = reply_status.get("error");
if let Some(err_obj) = error_opt {
let _ = service_poll
.update_job_status_unchecked(
context_id,
caller_id,
job_id,
JobStatus::Error,
)
.await;
let _ = service_poll
.append_message_logs(
context_id, caller_id, id,
vec![format!(
"Supervisor error for job {}: {} (sync)",
job_id, err_obj
)],
)
.await;
} else if let Some(res) = result_opt {
let status_candidate = res
.get("status")
.and_then(|v| v.as_str())
.or_else(|| res.as_str());
if let Some(remote_status) =
status_candidate
{
if let Some((mapped, terminal)) =
map_supervisor_job_status(
remote_status,
)
{
let _ = service_poll
.update_job_status_unchecked(
context_id, caller_id, job_id, mapped.clone(),
)
.await;
let _ = service_poll
.append_message_logs(
context_id, caller_id, id,
vec![format!(
"Supervisor job.status for job {} -> {} (mapped to {:?}, sync)",
job_id, remote_status, mapped
)],
)
.await;
// If terminal, request job.result now (handled above for local terminal case)
if terminal {
// trigger job.result only if result empty to avoid spam
if let Ok(j_after) =
service_poll
.load_job(
context_id,
caller_id,
job_id,
)
.await
{
if j_after
.result
.is_empty()
{
let sup2 = cache
.get_or_create(
sup_hub.clone(),
sup_dest.clone(),
sup_topic.clone(),
secret_for_poller.clone(),
)
.await;
let _ = sup2.job_result_wait(job_id.to_string()).await
.and_then(|(_oid, reply2)| {
// Minimal parse and store
let res2 = reply2.get("result");
if let Some(obj) = res2.and_then(|v| v.as_object()) {
if let Some(s) = obj.get("success").and_then(|v| v.as_str()) {
let mut patch = std::collections::HashMap::new();
patch.insert("success".to_string(), s.to_string());
tokio::spawn({
let service_poll = service_poll.clone();
async move {
let _ = service_poll.update_job_result_merge_unchecked(context_id, caller_id, job_id, patch).await;
}
});
}
}
Ok((String::new(), Value::Null))
});
}
}
// Mark processed and stop polling for this message
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Terminal job {} detected from supervisor status; stopping transport polling",
job_id
)],
)
.await;
break;
}
}
}
}
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"job.status request error: {}",
e
)],
)
.await;
}
}
}
}
}
// If we cannot load the job, fall back to requesting job.status
Err(_) => {
let sup = cache
.get_or_create(
sup_hub.clone(),
sup_dest.clone(),
sup_topic.clone(),
secret_for_poller.clone(),
)
.await;
match sup.job_status_wait(job_id.to_string()).await {
Ok((_out_id, _reply_status)) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Requested supervisor job.status for job {} (fallback; load_job failed, sync)",
job_id
)],
)
.await;
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"job.status request error: {}",
e
)],
)
.await;
}
}
}
}
// Ensure we only do this once
requested_job_check = true;
}
// break;
}
if matches!(s, TransportStatus::Failed) {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Transport failed for outbound id {out_id_cloned}"
)],
)
.await;
break;
}
}
Err(e) => {
// Log and continue polling
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!("messageStatus query error: {e}")],
)
.await;
}
}
tokio::time::sleep(poll_interval).await;
}
});
}
Ok(())
}

View File

@@ -213,37 +213,7 @@ fn validate_flow(context_id: u32, flow: &Flow) -> Result<(), BoxError> {
Ok(())
}
fn validate_job(context_id: u32, job: &Job) -> Result<(), BoxError> {
let v = as_json(job)?;
let id = json_get_u32(&v, "id")?;
if id == 0 {
return Err(ValidationError::new("Job.id must be > 0").into());
}
let ctx = json_get_u32(&v, "context_id")?;
if ctx != context_id {
return Err(ValidationError::new(format!(
"Job.context_id ({}) does not match path context_id ({})",
ctx, context_id
))
.into());
}
let script = json_get_str(&v, "script")?;
if script.trim().is_empty() {
return Err(ValidationError::new("Job.script must not be empty").into());
}
let timeout = json_get_u32(&v, "timeout")?;
if timeout == 0 {
return Err(ValidationError::new("Job.timeout must be > 0").into());
}
let depends = json_get_array(&v, "depends")?;
if has_duplicate_u32s(&depends) {
return Err(ValidationError::new("Job.depends must not contain duplicates").into());
}
if vec_u32_contains(&depends, id) {
return Err(ValidationError::new("Job.depends must not include the job's own id").into());
}
Ok(())
}
// Validation moved to Job model - use job.validate_required_fields() and job.validate_context()
fn validate_message(context_id: u32, msg: &Message) -> Result<(), BoxError> {
let v = as_json(msg)?;
@@ -496,14 +466,14 @@ impl AppService {
if deps_ok {
// Build Message embedding this job
let ts = crate::time::current_timestamp();
let msg_id: u32 = job.id(); // deterministic message id per job for now
let msg_id: u32 = job.id.parse().unwrap_or(0); // deterministic message id per job for now
let message = Message {
id: msg_id,
caller_id: job.caller_id(),
caller_id: job.caller_id.parse().unwrap_or(0),
context_id,
message: "job.run".to_string(),
message_type: job.script_type(),
message_type: ScriptType::Python, // Default, script_type is deprecated
message_format_type: MessageFormatType::Text,
timeout: job.timeout,
timeout_ack: 10,
@@ -520,17 +490,15 @@ impl AppService {
// Persist the message and enqueue it
if redis.save_message(context_id, &message).await.is_ok() {
let _ = redis
.enqueue_msg_out(context_id, job.caller_id(), msg_id)
.await;
.enqueue_msg_out(context_id, job.caller_id, msg_id);
// Mark job as Dispatched
let _ = redis
.update_job_status(
context_id,
job.caller_id(),
job.id(),
JobStatus::Dispatched,
)
.await;
context_id,
job.caller_id,
job.id,
JobStatus::Dispatched,
);
}
}
}
@@ -579,14 +547,14 @@ impl AppService {
// Build a Message that embeds this job
let ts = crate::time::current_timestamp();
let msg_id: u32 = job.id(); // deterministic; adjust strategy later if needed
let msg_id: u32 = job.id.parse().unwrap_or(0); // deterministic; adjust strategy later if needed
let message = Message {
id: msg_id,
caller_id: job.caller_id(),
caller_id: job.caller_id.parse().unwrap_or(0),
context_id,
message: "job.run".to_string(),
message_type: job.script_type(), // uses ScriptType (matches model)
message_type: ScriptType::Python, // Default, script_type is deprecated
message_format_type: MessageFormatType::Text,
timeout: job.timeout,
timeout_ack: 10,

View File

@@ -25,6 +25,10 @@ hero-job = { path = "../../models/job" }
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
jsonrpsee = { workspace = true, features = ["http-client", "macros"] }
tokio.workspace = true
async-trait.workspace = true
reqwest = { version = "0.12", features = ["json"] }
base64 = "0.22"
tracing.workspace = true
# hero-job-client removed - now part of supervisor
env_logger.workspace = true
http.workspace = true

View File

@@ -2,6 +2,13 @@ use serde::{Deserialize, Serialize};
use thiserror::Error;
use serde_json;
#[cfg(not(target_arch = "wasm32"))]
use async_trait::async_trait;
// Transport implementations
#[cfg(not(target_arch = "wasm32"))]
pub mod transports;
// Import types from the main supervisor crate
@@ -32,19 +39,69 @@ use jsonrpsee::{
#[cfg(not(target_arch = "wasm32"))]
use http::{HeaderMap, HeaderName, HeaderValue};
#[cfg(not(target_arch = "wasm32"))]
use std::path::PathBuf;
/// Client for communicating with Hero Supervisor OpenRPC server
/// Requires authentication secret for all operations
/// Transport abstraction for supervisor communication
/// Allows different transport layers (HTTP, Mycelium, etc.)
#[cfg(not(target_arch = "wasm32"))]
#[async_trait]
pub trait SupervisorTransport: Send + Sync {
/// Send a JSON-RPC request and await the response
async fn call(
&self,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value, ClientError>;
}
/// HTTP transport implementation using jsonrpsee
#[cfg(not(target_arch = "wasm32"))]
#[derive(Clone)]
pub struct SupervisorClient {
pub struct HttpTransport {
client: HttpClient,
server_url: String,
}
#[cfg(not(target_arch = "wasm32"))]
#[async_trait]
impl SupervisorTransport for HttpTransport {
async fn call(
&self,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value, ClientError> {
// params is already an array from the caller
// jsonrpsee expects params as an array, so pass it directly
let result: serde_json::Value = if params.is_array() {
// Use the array directly with rpc_params
let arr = params.as_array().unwrap();
match arr.len() {
0 => self.client.request(method, rpc_params![]).await?,
1 => self.client.request(method, rpc_params![&arr[0]]).await?,
_ => {
// For multiple params, we need to pass them as a slice
self.client.request(method, rpc_params![arr]).await?
}
}
} else {
// Single param not in array
self.client.request(method, rpc_params![&params]).await?
};
Ok(result)
}
}
/// Client for communicating with Hero Supervisor OpenRPC server
/// Generic over transport layer (HTTP, Mycelium, etc.)
#[cfg(not(target_arch = "wasm32"))]
#[derive(Clone)]
pub struct SupervisorClient<T: SupervisorTransport = HttpTransport> {
transport: T,
secret: String,
}
/// Legacy type alias for backward compatibility
#[cfg(not(target_arch = "wasm32"))]
pub type HttpSupervisorClient = SupervisorClient<HttpTransport>;
/// Error types for client operations
#[cfg(not(target_arch = "wasm32"))]
#[derive(Error, Debug)]
@@ -258,8 +315,8 @@ impl SupervisorClientBuilder {
self
}
/// Build the SupervisorClient
pub fn build(self) -> ClientResult<SupervisorClient> {
/// Build the SupervisorClient with HTTP transport
pub fn build(self) -> ClientResult<SupervisorClient<HttpTransport>> {
let server_url = self.url
.ok_or_else(|| ClientError::Http("URL is required".to_string()))?;
let secret = self.secret
@@ -280,9 +337,10 @@ impl SupervisorClientBuilder {
.build(&server_url)
.map_err(|e| ClientError::Http(e.to_string()))?;
let transport = HttpTransport { client };
Ok(SupervisorClient {
client,
server_url,
transport,
secret,
})
}
@@ -296,25 +354,24 @@ impl Default for SupervisorClientBuilder {
}
#[cfg(not(target_arch = "wasm32"))]
impl SupervisorClient {
/// Create a builder for SupervisorClient
impl SupervisorClient<HttpTransport> {
/// Create a builder for HTTP-based SupervisorClient
pub fn builder() -> SupervisorClientBuilder {
SupervisorClientBuilder::new()
}
/// Get the server URL
pub fn server_url(&self) -> &str {
&self.server_url
}
#[cfg(not(target_arch = "wasm32"))]
impl<T: SupervisorTransport> SupervisorClient<T> {
/// Create a new client with a custom transport
pub fn new(transport: T, secret: String) -> Self {
Self { transport, secret }
}
/// Test connection using OpenRPC discovery method
/// This calls the standard `rpc.discover` method that should be available on any OpenRPC server
pub async fn discover(&self) -> ClientResult<serde_json::Value> {
let result: serde_json::Value = self
.client
.request("rpc.discover", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
self.transport.call("rpc.discover", serde_json::json!([])).await
}
/// Register a new runner to the supervisor
@@ -324,11 +381,8 @@ impl SupervisorClient {
&self,
name: &str,
) -> ClientResult<()> {
let _: () = self
.client
.request("runner.create", rpc_params![name])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
let result = self.transport.call("runner.create", serde_json::json!([name])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Create a new job without queuing it to a runner
@@ -337,20 +391,14 @@ impl SupervisorClient {
&self,
job: Job,
) -> ClientResult<String> {
let job_id: String = self
.client
.request("job.create", rpc_params![job])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(job_id)
let result = self.transport.call("job.create", serde_json::json!([job])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// List all jobs
pub async fn job_list(&self) -> ClientResult<Vec<Job>> {
let jobs: Vec<Job> = self
.client
.request("job.list", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(jobs)
let result = self.transport.call("job.list", serde_json::json!([])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Run a job on the appropriate runner and wait for the result (blocking)
@@ -369,11 +417,8 @@ impl SupervisorClient {
params["timeout"] = serde_json::json!(t);
}
let result: JobRunResponse = self
.client
.request("job.run", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
let result = self.transport.call("job.run", serde_json::json!([params])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Start a job without waiting for the result (non-blocking)
@@ -387,58 +432,40 @@ impl SupervisorClient {
"job": job
});
let result: JobStartResponse = self
.client
.request("job.start", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
let result = self.transport.call("job.start", serde_json::json!([params])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get the current status of a job
pub async fn job_status(&self, job_id: &str) -> ClientResult<JobStatus> {
let status: JobStatus = self
.client
.request("job.status", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(status)
let result = self.transport.call("job.status", serde_json::json!([job_id])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get the result of a completed job (blocks until result is available)
pub async fn job_result(&self, job_id: &str) -> ClientResult<JobResult> {
let result: JobResult = self
.client
.request("job.result", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
let result = self.transport.call("job.result", serde_json::json!([job_id])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Remove a runner from the supervisor
/// Authentication via Authorization header (set during client creation)
pub async fn runner_remove(&self, runner_id: &str) -> ClientResult<()> {
let _: () = self
.client
.request("runner.remove", rpc_params![runner_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
let result = self.transport.call("runner.remove", serde_json::json!([runner_id])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// List all runner IDs
pub async fn runner_list(&self) -> ClientResult<Vec<String>> {
let runners: Vec<String> = self
.client
.request("runner.list", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(runners)
let result = self.transport.call("runner.list", serde_json::json!([])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Start a specific runner
/// Authentication via Authorization header (set during client creation)
pub async fn start_runner(&self, actor_id: &str) -> ClientResult<()> {
let _: () = self
.client
.request("runner.start", rpc_params![actor_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
let result = self.transport.call("runner.start", serde_json::json!([actor_id])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Add a runner to the supervisor
@@ -447,21 +474,21 @@ impl SupervisorClient {
let params = serde_json::json!({
"config": config
});
let _: () = self
.client
.request("runner.add", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
let result = self
.transport
.call("runner.add", serde_json::json!([params]))
.await?;
Ok(())
}
/// Get status of a specific runner
/// Authentication via Authorization header (set during client creation)
pub async fn get_runner_status(&self, actor_id: &str) -> ClientResult<RunnerStatus> {
let status: RunnerStatus = self
.client
.request("runner.status", rpc_params![actor_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(status)
let result = self
.transport
.call("runner.status", serde_json::json!([actor_id]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get logs for a specific runner
@@ -471,11 +498,11 @@ impl SupervisorClient {
lines: Option<usize>,
follow: bool,
) -> ClientResult<Vec<LogInfo>> {
let logs: Vec<LogInfo> = self
.client
.request("get_runner_logs", rpc_params![actor_id, lines, follow])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(logs)
let result = self
.transport
.call("get_runner_logs", serde_json::json!([actor_id, lines, follow]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Queue a job to a specific runner
@@ -485,10 +512,10 @@ impl SupervisorClient {
"job": job
});
let _: () = self
.client
.request("queue_job_to_runner", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
let result = self
.transport
.call("queue_job_to_runner", serde_json::json!([params]))
.await?;
Ok(())
}
@@ -500,11 +527,11 @@ impl SupervisorClient {
"timeout_secs": timeout_secs
});
let result: Option<String> = self
.client
.request("queue_and_wait", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
let result = self
.transport
.call("queue_and_wait", serde_json::json!([params]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Run a job on a specific runner
@@ -513,56 +540,56 @@ impl SupervisorClient {
"job": job
});
let result: JobResult = self
.client
.request("job.run", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
let result = self
.transport
.call("job.run", serde_json::json!([params]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get job result by job ID
pub async fn get_job_result(&self, job_id: &str) -> ClientResult<Option<String>> {
let result: Option<String> = self
.client
.request("get_job_result", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(result)
let result = self
.transport
.call("get_job_result", serde_json::json!([job_id]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get status of all runners
pub async fn get_all_runner_status(&self) -> ClientResult<Vec<(String, RunnerStatus)>> {
let statuses: Vec<(String, RunnerStatus)> = self
.client
.request("get_all_runner_status", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(statuses)
let result = self
.transport
.call("get_all_runner_status", serde_json::json!([]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Start all runners
pub async fn start_all(&self) -> ClientResult<Vec<(String, bool)>> {
let results: Vec<(String, bool)> = self
.client
.request("start_all", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(results)
let result = self
.transport
.call("start_all", serde_json::json!([]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Stop all runners
pub async fn stop_all(&self, force: bool) -> ClientResult<Vec<(String, bool)>> {
let results: Vec<(String, bool)> = self
.client
.request("stop_all", rpc_params![force])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(results)
let result = self
.transport
.call("stop_all", serde_json::json!([force]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get status of all runners (alternative method)
pub async fn get_all_status(&self) -> ClientResult<Vec<(String, RunnerStatus)>> {
let statuses: Vec<(String, RunnerStatus)> = self
.client
.request("get_all_status", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(statuses)
let result = self
.transport
.call("get_all_status", serde_json::json!([]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Add a secret to the supervisor
@@ -576,10 +603,10 @@ impl SupervisorClient {
"secret_value": secret_value
});
let _: () = self
.client
.request("add_secret", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
let result = self
.transport
.call("add_secret", serde_json::json!([params]))
.await?;
Ok(())
}
@@ -594,10 +621,10 @@ impl SupervisorClient {
"secret_value": secret_value
});
let _: () = self
.client
.request("remove_secret", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
let result = self
.transport
.call("remove_secret", serde_json::json!([params]))
.await?;
Ok(())
}
@@ -605,91 +632,87 @@ impl SupervisorClient {
pub async fn list_secrets(&self) -> ClientResult<SupervisorInfo> {
let params = serde_json::json!({});
let info: SupervisorInfo = self
.client
.request("list_secrets", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(info)
let result = self
.transport
.call("list_secrets", serde_json::json!([params]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Stop a running job
pub async fn job_stop(&self, job_id: &str) -> ClientResult<()> {
let _: () = self.client
.request("job.stop", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
let result = self.transport.call("job.stop", serde_json::json!([job_id])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Delete a job from the system
pub async fn job_delete(&self, job_id: &str) -> ClientResult<()> {
let _: () = self.client
.request("job.delete", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(())
let result = self.transport.call("job.delete", serde_json::json!([job_id])).await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get supervisor information including secret counts
pub async fn get_supervisor_info(&self) -> ClientResult<SupervisorInfo> {
let info: SupervisorInfo = self
.client
.request("supervisor.info", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(info)
let result = self
.transport
.call("supervisor.info", serde_json::json!([]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Get a job by ID
pub async fn job_get(&self, job_id: &str) -> ClientResult<Job> {
let job: Job = self
.client
.request("job.get", rpc_params![job_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(job)
let result = self
.transport
.call("job.get", serde_json::json!([job_id]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
// ========== Auth/API Key Methods ==========
/// Verify the current API key
pub async fn auth_verify(&self) -> ClientResult<AuthVerifyResponse> {
let response: AuthVerifyResponse = self
.client
.request("auth.verify", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(response)
let result = self
.transport
.call("auth.verify", serde_json::json!([]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Create a new API key (admin only)
pub async fn key_create(&self, key: ApiKey) -> ClientResult<()> {
let _: () = self
.client
.request("key.create", rpc_params![key])
.await.map_err(|e| ClientError::JsonRpc(e))?;
let result = self
.transport
.call("key.create", serde_json::json!([key]))
.await?;
Ok(())
}
/// Generate a new API key with auto-generated key value (admin only)
pub async fn key_generate(&self, params: GenerateApiKeyParams) -> ClientResult<ApiKey> {
let api_key: ApiKey = self
.client
.request("key.generate", rpc_params![params])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(api_key)
let result = self
.transport
.call("key.generate", serde_json::json!([params]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
/// Remove an API key (admin only)
pub async fn key_delete(&self, key_id: String) -> ClientResult<()> {
let _: () = self
.client
.request("key.delete", rpc_params![key_id])
.await.map_err(|e| ClientError::JsonRpc(e))?;
let result = self
.transport
.call("key.delete", serde_json::json!([key_id]))
.await?;
Ok(())
}
/// List all API keys (admin only)
pub async fn key_list(&self) -> ClientResult<Vec<ApiKey>> {
let keys: Vec<ApiKey> = self
.client
.request("key.list", rpc_params![])
.await.map_err(|e| ClientError::JsonRpc(e))?;
Ok(keys)
let result = self
.transport
.call("key.list", serde_json::json!([]))
.await?;
serde_json::from_value(result).map_err(ClientError::Serialization)
}
}

View File

@@ -0,0 +1,13 @@
/// Mycelium transport for supervisor communication
#[cfg(not(target_arch = "wasm32"))]
pub mod mycelium;
#[cfg(not(target_arch = "wasm32"))]
pub use mycelium::{
Destination,
MyceliumClient,
MyceliumClientError,
MyceliumTransport,
SupervisorHub,
TransportStatus,
};

View File

@@ -0,0 +1,366 @@
use std::net::IpAddr;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::collections::HashMap;
use std::time::Duration;
use async_trait::async_trait;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use reqwest::Client as HttpClient;
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use thiserror::Error;
use tokio::sync::{Mutex, oneshot};
use tokio::time::timeout;
use crate::{SupervisorTransport, ClientError};
/// Destination for Mycelium messages
#[derive(Clone, Debug)]
pub enum Destination {
Ip(IpAddr),
/// 64-hex public key of the receiver node
Pk(String),
}
/// Transport status from Mycelium
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum TransportStatus {
Pending,
Sent,
Delivered,
Failed,
Timeout,
}
/// Lightweight client for Mycelium JSON-RPC (send + query status)
#[derive(Clone)]
pub struct MyceliumClient {
base_url: String, // e.g. http://127.0.0.1:8990
http: HttpClient,
id_counter: Arc<AtomicU64>,
}
#[derive(Debug, Error)]
pub enum MyceliumClientError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
#[error("Transport timed out waiting for a reply (408)")]
TransportTimeout,
#[error("JSON-RPC error: {0}")]
RpcError(String),
#[error("Invalid response: {0}")]
InvalidResponse(String),
}
impl From<MyceliumClientError> for ClientError {
fn from(e: MyceliumClientError) -> Self {
match e {
MyceliumClientError::Http(err) => ClientError::Http(err.to_string()),
MyceliumClientError::Json(err) => ClientError::Serialization(err),
MyceliumClientError::TransportTimeout => ClientError::Server { message: "Transport timeout".to_string() },
MyceliumClientError::RpcError(msg) => ClientError::Server { message: msg },
MyceliumClientError::InvalidResponse(msg) => ClientError::Server { message: msg },
}
}
}
impl MyceliumClient {
pub fn new(base_url: impl Into<String>) -> Result<Self, MyceliumClientError> {
let url = base_url.into();
let http = HttpClient::builder().build()?;
Ok(Self {
base_url: url,
http,
id_counter: Arc::new(AtomicU64::new(1)),
})
}
fn next_id(&self) -> u64 {
self.id_counter.fetch_add(1, Ordering::Relaxed)
}
async fn jsonrpc(&self, method: &str, params: Value) -> Result<Value, MyceliumClientError> {
let req = json!({
"jsonrpc": "2.0",
"id": self.next_id(),
"method": method,
"params": [ params ]
});
tracing::info!(%req, "jsonrpc");
let resp = self.http.post(&self.base_url).json(&req).send().await?;
let status = resp.status();
let body: Value = resp.json().await?;
if let Some(err) = body.get("error") {
let code = err.get("code").and_then(|v| v.as_i64()).unwrap_or(0);
let msg = err
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("unknown error");
if code == 408 {
return Err(MyceliumClientError::TransportTimeout);
}
return Err(MyceliumClientError::RpcError(format!(
"code={code} msg={msg}"
)));
}
if !status.is_success() {
return Err(MyceliumClientError::RpcError(format!(
"HTTP {status}, body {body}"
)));
}
Ok(body)
}
/// Call messageStatus with an outbound message id (hex string)
pub async fn message_status(
&self,
id_hex: &str,
) -> Result<TransportStatus, MyceliumClientError> {
let params = json!(id_hex);
let body = self.jsonrpc("getMessageInfo", params).await?;
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in response: {body}"))
})?;
// Accept both { state: "..."} and bare "..."
let status_str = if let Some(s) = result.get("state").and_then(|v| v.as_str()) {
s.to_string()
} else if let Some(s) = result.as_str() {
s.to_string()
} else {
return Err(MyceliumClientError::InvalidResponse(format!(
"expected string or object with state, got {result}"
)));
};
match status_str.as_str() {
"pending" => Ok(TransportStatus::Pending),
"sent" => Ok(TransportStatus::Sent),
"delivered" => Ok(TransportStatus::Delivered),
"failed" => Ok(TransportStatus::Failed),
"timeout" => Ok(TransportStatus::Timeout),
_ => Err(MyceliumClientError::InvalidResponse(format!(
"unknown status: {status_str}"
))),
}
}
/// Push a message via Mycelium
pub async fn push_message(
&self,
dst: Value,
topic: &str,
payload: &str,
) -> Result<String, MyceliumClientError> {
let params = json!({
"dst": dst,
"topic": BASE64_STANDARD.encode(topic.as_bytes()),
"payload": payload,
});
let body = self.jsonrpc("pushMessage", params).await?;
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in pushMessage response"))
})?;
// Extract message ID
result
.get("id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing id in result: {result}"))
})
}
/// Pop a message from a topic
pub async fn pop_message(&self, topic: &str) -> Result<Option<Value>, MyceliumClientError> {
let params = json!({
"topic": BASE64_STANDARD.encode(topic.as_bytes()),
});
let body = self.jsonrpc("popMessage", params).await?;
let result = body.get("result").ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("missing result in popMessage response"))
})?;
if result.is_null() {
Ok(None)
} else {
Ok(Some(result.clone()))
}
}
}
/// Hub that manages request/reply correlation for supervisor calls via Mycelium
pub struct SupervisorHub {
mycelium: Arc<MyceliumClient>,
topic: String,
id_counter: Arc<AtomicU64>,
waiters: Arc<Mutex<HashMap<u64, oneshot::Sender<Value>>>>,
}
impl SupervisorHub {
pub fn new_with_client(mycelium: Arc<MyceliumClient>, topic: impl Into<String>) -> Arc<Self> {
let hub = Arc::new(Self {
mycelium,
topic: topic.into(),
id_counter: Arc::new(AtomicU64::new(1)),
waiters: Arc::new(Mutex::new(HashMap::new())),
});
// Spawn background listener
let hub_clone = hub.clone();
tokio::spawn(async move {
hub_clone.listen_loop().await;
});
hub
}
pub fn next_id(&self) -> u64 {
self.id_counter.fetch_add(1, Ordering::Relaxed)
}
pub async fn register_waiter(&self, id: u64) -> oneshot::Receiver<Value> {
let (tx, rx) = oneshot::channel();
self.waiters.lock().await.insert(id, tx);
rx
}
async fn listen_loop(&self) {
loop {
match self.mycelium.pop_message(&self.topic).await {
Ok(Some(envelope)) => {
if let Err(e) = self.handle_message(envelope).await {
tracing::warn!("Failed to handle message: {}", e);
}
}
Ok(None) => {
// No message, wait a bit
tokio::time::sleep(Duration::from_millis(100)).await;
}
Err(e) => {
tracing::error!("Error popping message: {}", e);
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}
}
async fn handle_message(&self, envelope: Value) -> Result<(), String> {
// Decode payload
let payload_b64 = envelope
.get("payload")
.and_then(|v| v.as_str())
.ok_or_else(|| "missing payload".to_string())?;
let payload_bytes = BASE64_STANDARD
.decode(payload_b64)
.map_err(|e| format!("base64 decode error: {}", e))?;
let payload_str = String::from_utf8(payload_bytes)
.map_err(|e| format!("utf8 decode error: {}", e))?;
let reply: Value = serde_json::from_str(&payload_str)
.map_err(|e| format!("json parse error: {}", e))?;
// Extract ID
let id = reply
.get("id")
.and_then(|v| v.as_u64())
.ok_or_else(|| "missing or invalid id in reply".to_string())?;
// Notify waiter
if let Some(tx) = self.waiters.lock().await.remove(&id) {
let _ = tx.send(reply);
}
Ok(())
}
}
/// Mycelium transport implementation for SupervisorClient
pub struct MyceliumTransport {
hub: Arc<SupervisorHub>,
destination: Destination,
timeout_secs: u64,
}
impl MyceliumTransport {
pub fn new(hub: Arc<SupervisorHub>, destination: Destination) -> Self {
Self {
hub,
destination,
timeout_secs: 10,
}
}
pub fn with_timeout(mut self, timeout_secs: u64) -> Self {
self.timeout_secs = timeout_secs;
self
}
fn build_dst(&self) -> Value {
match &self.destination {
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
Destination::Pk(pk) => json!({ "pk": pk }),
}
}
}
#[async_trait]
impl SupervisorTransport for MyceliumTransport {
async fn call(
&self,
method: &str,
params: Value,
) -> Result<Value, ClientError> {
let inner_id = self.hub.next_id();
// Register waiter before sending
let rx = self.hub.register_waiter(inner_id).await;
// Build JSON-RPC payload
let inner = json!({
"jsonrpc": "2.0",
"id": inner_id,
"method": method,
"params": params,
});
// Encode and send
let payload_str = serde_json::to_string(&inner)
.map_err(ClientError::Serialization)?;
let payload_b64 = BASE64_STANDARD.encode(payload_str.as_bytes());
let _msg_id = self.hub.mycelium
.push_message(self.build_dst(), &self.hub.topic, &payload_b64)
.await
.map_err(|e| ClientError::from(e))?;
// Wait for reply
let reply = timeout(Duration::from_secs(self.timeout_secs), rx)
.await
.map_err(|_| ClientError::Server { message: "Timeout waiting for reply".to_string() })?
.map_err(|_| ClientError::Server { message: "Reply channel closed".to_string() })?;
// Check for JSON-RPC error
if let Some(error) = reply.get("error") {
let msg = error.get("message")
.and_then(|v| v.as_str())
.unwrap_or("unknown error");
return Err(ClientError::Server { message: msg.to_string() });
}
// Extract result
reply.get("result")
.cloned()
.ok_or_else(|| ClientError::Server { message: "Missing result in reply".to_string() })
}
}