Add calling of supervisor over mycelium
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
This commit is contained in:
211
src/router.rs
Normal file
211
src/router.rs
Normal file
@@ -0,0 +1,211 @@
|
||||
use std::{collections::HashSet, sync::Arc};
|
||||
|
||||
use serde_json::{Value, json};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use crate::{
|
||||
clients::{Destination, SupervisorClient},
|
||||
models::{Job, Message, MessageStatus, ScriptType},
|
||||
service::AppService,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RouterConfig {
|
||||
pub context_ids: Vec<u32>,
|
||||
pub concurrency: usize,
|
||||
pub base_url: String, // e.g. http://127.0.0.1:8990
|
||||
pub topic: String, // e.g. "supervisor.rpc"
|
||||
// secret currently unused (None), add here later if needed
|
||||
}
|
||||
|
||||
/// Start background router loops, one per context.
|
||||
/// Each loop:
|
||||
/// - BRPOP msg_out with 1s timeout
|
||||
/// - Loads the Message by key, selects a Runner by script_type
|
||||
/// - Sends supervisor JSON-RPC via Mycelium
|
||||
/// - On success: Message.status = Acknowledged
|
||||
/// - On error: Message.status = Error and append a log
|
||||
pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::JoinHandle<()>> {
|
||||
let mut handles = Vec::new();
|
||||
for ctx_id in cfg.context_ids.clone() {
|
||||
let service_cloned = service.clone();
|
||||
let cfg_cloned = cfg.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
let sem = Arc::new(Semaphore::new(cfg_cloned.concurrency));
|
||||
loop {
|
||||
// Pop next message key (blocking with timeout)
|
||||
match service_cloned.brpop_msg_out(ctx_id, 1).await {
|
||||
Ok(Some(key)) => {
|
||||
let permit = {
|
||||
// acquire a concurrency permit (non-fair is fine)
|
||||
let sem = sem.clone();
|
||||
// if semaphore is exhausted, await until a slot becomes available
|
||||
match sem.acquire_owned().await {
|
||||
Ok(p) => p,
|
||||
Err(_) => {
|
||||
// Semaphore closed; exit loop
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
let service_task = service_cloned.clone();
|
||||
let cfg_task = cfg_cloned.clone();
|
||||
tokio::spawn(async move {
|
||||
// Ensure permit is dropped at end of task
|
||||
let _permit = permit;
|
||||
if let Err(e) =
|
||||
deliver_one(&service_task, &cfg_task, ctx_id, &key).await
|
||||
{
|
||||
eprintln!(
|
||||
"[router ctx={}] delivery error for {}: {}",
|
||||
ctx_id, key, e
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
Ok(None) => {
|
||||
// timeout: just tick
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[router ctx={}] brpop error: {}", ctx_id, e);
|
||||
// small backoff to avoid busy-loop on persistent errors
|
||||
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
handles
|
||||
}
|
||||
|
||||
async fn deliver_one(
|
||||
service: &AppService,
|
||||
cfg: &RouterConfig,
|
||||
context_id: u32,
|
||||
msg_key: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Parse "message:{caller_id}:{id}"
|
||||
let (caller_id, id) = parse_message_key(msg_key)
|
||||
.ok_or_else(|| format!("invalid message key format: {}", msg_key))?;
|
||||
|
||||
// Load message
|
||||
let msg: Message = service.load_message(context_id, caller_id, id).await?;
|
||||
|
||||
// Determine routing script_type
|
||||
let desired: ScriptType = determine_script_type(&msg);
|
||||
|
||||
// Discover runners and select a matching one
|
||||
let runners = service.scan_runners(context_id).await?;
|
||||
let Some(runner) = runners.into_iter().find(|r| r.script_type == desired) else {
|
||||
let log = format!(
|
||||
"No runner with script_type {:?} available in context {} for message {}",
|
||||
desired, context_id, msg_key
|
||||
);
|
||||
let _ = service
|
||||
.append_message_logs(context_id, caller_id, id, vec![log.clone()])
|
||||
.await;
|
||||
let _ = service
|
||||
.update_message_status(context_id, caller_id, id, MessageStatus::Error)
|
||||
.await;
|
||||
return Err(log.into());
|
||||
};
|
||||
|
||||
// Build SupervisorClient
|
||||
let dest = if !runner.pubkey.trim().is_empty() {
|
||||
Destination::Pk(runner.pubkey.clone())
|
||||
} else {
|
||||
Destination::Ip(runner.address)
|
||||
};
|
||||
let client = SupervisorClient::new(
|
||||
cfg.base_url.clone(),
|
||||
dest,
|
||||
cfg.topic.clone(),
|
||||
None, // secret
|
||||
)?;
|
||||
|
||||
// Build supervisor method and params from Message
|
||||
let method = msg.message.clone();
|
||||
let params = build_params(&msg)?;
|
||||
|
||||
// Send
|
||||
let _out_id = client.call(&method, params).await?;
|
||||
|
||||
// Mark as acknowledged on success
|
||||
service
|
||||
.update_message_status(context_id, caller_id, id, MessageStatus::Acknowledged)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn determine_script_type(msg: &Message) -> ScriptType {
|
||||
// Prefer embedded job's script_type if available, else fallback to message.message_type
|
||||
match msg.job.first() {
|
||||
Some(j) => j.script_type.clone(),
|
||||
None => msg.message_type.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_params(msg: &Message) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Minimal mapping:
|
||||
// - "job.run" with exactly one embedded job: [{ "job": <job> }]
|
||||
// - otherwise: []
|
||||
if msg.message == "job.run"
|
||||
&& let Some(j) = msg.job.first()
|
||||
{
|
||||
let jv = job_to_json(j)?;
|
||||
return Ok(json!([ { "job": jv } ]));
|
||||
}
|
||||
|
||||
Ok(json!([]))
|
||||
}
|
||||
|
||||
fn job_to_json(job: &Job) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
Ok(serde_json::to_value(job)?)
|
||||
}
|
||||
|
||||
fn parse_message_key(s: &str) -> Option<(u32, u32)> {
|
||||
// Expect "message:{caller_id}:{id}"
|
||||
let mut it = s.split(':');
|
||||
match (it.next(), it.next(), it.next(), it.next()) {
|
||||
(Some("message"), Some(caller), Some(id), None) => {
|
||||
let caller_id = caller.parse::<u32>().ok()?;
|
||||
let msg_id = id.parse::<u32>().ok()?;
|
||||
Some((caller_id, msg_id))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Auto-discover contexts periodically and ensure a router loop exists for each.
|
||||
/// Returns a JoinHandle of the discovery task (router loops are detached).
|
||||
pub fn start_router_auto(service: AppService, cfg: RouterConfig) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
let mut active: HashSet<u32> = HashSet::new();
|
||||
loop {
|
||||
match service.list_context_ids().await {
|
||||
Ok(ids) => {
|
||||
for ctx_id in ids {
|
||||
if !active.contains(&ctx_id) {
|
||||
// Spawn a loop for this new context
|
||||
let cfg_ctx = RouterConfig {
|
||||
context_ids: vec![ctx_id],
|
||||
..cfg.clone()
|
||||
};
|
||||
let _ = start_router(service.clone(), cfg_ctx);
|
||||
active.insert(ctx_id);
|
||||
eprintln!("[router] started loop for context {}", ctx_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[router] list_context_ids error: {}", e);
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
}
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user