forked from herocode/horus
fix coordinator compilation
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
use crate::dag::{DagError, DagResult, FlowDag, build_flow_dag};
|
||||
use crate::dag::{DagError, DagResult, FlowDag, NodeStatus, build_flow_dag};
|
||||
use crate::models::{
|
||||
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType, MessageStatus,
|
||||
Context, Flow, FlowStatus, Job, JobStatus, Message, MessageFormatType, MessageStatus,
|
||||
Runner, TransportStatus,
|
||||
};
|
||||
use crate::storage::RedisDriver;
|
||||
@@ -157,22 +157,8 @@ fn validate_context(ctx: &Context) -> Result<(), BoxError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_actor(actor: &Actor) -> Result<(), BoxError> {
|
||||
let v = as_json(actor)?;
|
||||
let id = json_get_u32(&v, "id")?;
|
||||
if id == 0 {
|
||||
return Err(ValidationError::new("Actor.id must be > 0").into());
|
||||
}
|
||||
let pubkey = json_get_str(&v, "pubkey")?;
|
||||
if pubkey.trim().is_empty() {
|
||||
return Err(ValidationError::new("Actor.pubkey must not be empty").into());
|
||||
}
|
||||
let addr = json_get_array(&v, "address")?;
|
||||
if addr.is_empty() {
|
||||
return Err(ValidationError::new("Actor.address must not be empty").into());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// Actor was renamed to Runner - validate_actor is deprecated
|
||||
// fn validate_actor(actor: &Actor) -> Result<(), BoxError> { ... }
|
||||
|
||||
fn validate_runner(_context_id: u32, runner: &Runner) -> Result<(), BoxError> {
|
||||
let v = as_json(runner)?;
|
||||
@@ -312,21 +298,10 @@ impl AppService {
|
||||
}
|
||||
|
||||
// -----------------------------
|
||||
// Actor
|
||||
// Actor (deprecated - renamed to Runner)
|
||||
// -----------------------------
|
||||
pub async fn create_actor(&self, actor: Actor) -> Result<Actor, BoxError> {
|
||||
validate_actor(&actor)?;
|
||||
let v = as_json(&actor)?;
|
||||
let id = json_get_u32(&v, "id")?;
|
||||
self.ensure_actor_not_exists_global(id).await?;
|
||||
self.redis.save_actor_global(&actor).await?;
|
||||
Ok(actor)
|
||||
}
|
||||
|
||||
pub async fn load_actor(&self, id: u32) -> Result<Actor, BoxError> {
|
||||
let actor = self.redis.load_actor_global(id).await?;
|
||||
Ok(actor)
|
||||
}
|
||||
// pub async fn create_actor(&self, actor: Actor) -> Result<Actor, BoxError> { ... }
|
||||
// pub async fn load_actor(&self, id: u32) -> Result<Actor, BoxError> { ... }
|
||||
|
||||
// -----------------------------
|
||||
// Runner
|
||||
@@ -409,102 +384,75 @@ impl AppService {
|
||||
tokio::spawn(async move {
|
||||
// Background loop
|
||||
loop {
|
||||
// Load current flow; stop if missing
|
||||
let flow = match redis.load_flow(context_id, flow_id).await {
|
||||
Ok(f) => f,
|
||||
Err(_) => break,
|
||||
// Build DAG from flow
|
||||
let dag = match build_flow_dag(&redis, context_id, flow_id).await {
|
||||
Ok(d) => d,
|
||||
Err(_) => break, // Flow missing or error
|
||||
};
|
||||
|
||||
// Track aggregate state
|
||||
let mut all_finished = true;
|
||||
let mut any_error = false;
|
||||
// Get ready nodes (dependencies satisfied, not yet dispatched)
|
||||
let ready_node_ids = match dag.ready_jobs() {
|
||||
Ok(ids) => ids,
|
||||
Err(_) => {
|
||||
// DAG error (e.g., failed job), mark flow as error and exit
|
||||
let _ = redis
|
||||
.update_flow_status(context_id, flow_id, FlowStatus::Error)
|
||||
.await;
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Iterate jobs declared in the flow
|
||||
for jid in flow.jobs() {
|
||||
// Load job
|
||||
let job = match redis.load_job(context_id, caller_id, *jid).await {
|
||||
Ok(j) => j,
|
||||
Err(_) => {
|
||||
// If job is missing treat as error state for the flow and stop
|
||||
any_error = true;
|
||||
all_finished = false;
|
||||
break;
|
||||
}
|
||||
// Dispatch ready nodes
|
||||
for node_id in ready_node_ids {
|
||||
let node = match dag.nodes.get(&node_id) {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
match job.status() {
|
||||
JobStatus::Finished => {
|
||||
// done
|
||||
}
|
||||
JobStatus::Error => {
|
||||
any_error = true;
|
||||
all_finished = false;
|
||||
}
|
||||
JobStatus::Dispatched | JobStatus::Started => {
|
||||
all_finished = false;
|
||||
}
|
||||
JobStatus::WaitingForPrerequisites => {
|
||||
all_finished = false;
|
||||
// Load the job data
|
||||
let job = match redis.load_job(context_id, caller_id, node_id).await {
|
||||
Ok(j) => j,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
// Check dependencies complete
|
||||
let mut deps_ok = true;
|
||||
for dep in job.depends() {
|
||||
match redis.load_job(context_id, caller_id, *dep).await {
|
||||
Ok(dj) => {
|
||||
if dj.status() != JobStatus::Finished {
|
||||
deps_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
deps_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Build Message with FlowNode for routing
|
||||
let ts = crate::time::current_timestamp();
|
||||
let msg_id: u32 = node_id; // Use node_id as message_id
|
||||
|
||||
if deps_ok {
|
||||
// Build Message embedding this job
|
||||
let ts = crate::time::current_timestamp();
|
||||
let msg_id: u32 = job.id.parse().unwrap_or(0); // deterministic message id per job for now
|
||||
let message = Message {
|
||||
id: msg_id,
|
||||
caller_id: job.caller_id.parse().unwrap_or(0),
|
||||
context_id,
|
||||
flow_id,
|
||||
message: "job.run".to_string(),
|
||||
message_type: job.executor.clone(),
|
||||
message_format_type: MessageFormatType::Text,
|
||||
timeout: job.timeout as u32,
|
||||
timeout_ack: 10,
|
||||
timeout_result: job.timeout as u32,
|
||||
transport_id: None,
|
||||
transport_status: None,
|
||||
nodes: vec![node.clone()], // Include FlowNode for routing
|
||||
job: vec![job.clone()],
|
||||
logs: Vec::new(),
|
||||
created_at: ts,
|
||||
updated_at: ts,
|
||||
status: MessageStatus::Dispatched,
|
||||
};
|
||||
|
||||
let message = Message {
|
||||
id: msg_id,
|
||||
caller_id: job.caller_id.parse().unwrap_or(0),
|
||||
context_id,
|
||||
message: "job.run".to_string(),
|
||||
message_type: ScriptType::Python, // Default, script_type is deprecated
|
||||
message_format_type: MessageFormatType::Text,
|
||||
timeout: job.timeout,
|
||||
timeout_ack: 10,
|
||||
timeout_result: job.timeout,
|
||||
transport_id: None,
|
||||
transport_status: None,
|
||||
job: vec![job.clone()],
|
||||
logs: Vec::new(),
|
||||
created_at: ts,
|
||||
updated_at: ts,
|
||||
status: MessageStatus::Dispatched,
|
||||
};
|
||||
|
||||
// Persist the message and enqueue it
|
||||
if redis.save_message(context_id, &message).await.is_ok() {
|
||||
let _ = redis
|
||||
.enqueue_msg_out(context_id, job.caller_id, msg_id);
|
||||
// Mark job as Dispatched
|
||||
let _ = redis
|
||||
.update_job_status(
|
||||
context_id,
|
||||
job.caller_id,
|
||||
job.id,
|
||||
JobStatus::Dispatched,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Persist the message and enqueue it
|
||||
if redis.save_message(context_id, &message).await.is_ok() {
|
||||
let caller_id_u32 = job.caller_id.parse::<u32>().unwrap_or(0);
|
||||
let _ = redis.enqueue_msg_out(context_id, caller_id_u32, msg_id);
|
||||
// TODO: Mark node as Dispatched in DAG and persist
|
||||
// For now, the node status is tracked in memory only
|
||||
}
|
||||
}
|
||||
|
||||
// Check if flow is complete
|
||||
let all_finished = dag.completed.len() == dag.nodes.len();
|
||||
let any_error = dag.failed_job.is_some();
|
||||
|
||||
if any_error {
|
||||
let _ = redis
|
||||
.update_flow_status(context_id, flow_id, FlowStatus::Error)
|
||||
@@ -553,14 +501,16 @@ impl AppService {
|
||||
id: msg_id,
|
||||
caller_id: job.caller_id.parse().unwrap_or(0),
|
||||
context_id,
|
||||
flow_id, // Add flow_id for DAG tracking
|
||||
message: "job.run".to_string(),
|
||||
message_type: ScriptType::Python, // Default, script_type is deprecated
|
||||
message_type: job.executor.clone(),
|
||||
message_format_type: MessageFormatType::Text,
|
||||
timeout: job.timeout,
|
||||
timeout: job.timeout as u32,
|
||||
timeout_ack: 10,
|
||||
timeout_result: job.timeout,
|
||||
timeout_result: job.timeout as u32,
|
||||
transport_id: None,
|
||||
transport_status: None,
|
||||
nodes: Vec::new(), // TODO: Add FlowNode from DAG
|
||||
job: vec![job.clone()],
|
||||
logs: Vec::new(),
|
||||
created_at: ts,
|
||||
@@ -574,12 +524,13 @@ impl AppService {
|
||||
.await
|
||||
.map_err(DagError::from)?;
|
||||
|
||||
let caller_id_u32 = job.caller_id.parse::<u32>().unwrap_or(0);
|
||||
self.redis
|
||||
.enqueue_msg_out(context_id, job.caller_id(), msg_id)
|
||||
.enqueue_msg_out(context_id, caller_id_u32, msg_id)
|
||||
.await
|
||||
.map_err(DagError::from)?;
|
||||
|
||||
let key = format!("message:{}:{}", job.caller_id(), msg_id);
|
||||
let key = format!("message:{}:{}", caller_id_u32, msg_id);
|
||||
queued.push(key);
|
||||
}
|
||||
|
||||
@@ -590,7 +541,7 @@ impl AppService {
|
||||
// Job
|
||||
// -----------------------------
|
||||
pub async fn create_job(&self, context_id: u32, job: Job) -> Result<Job, BoxError> {
|
||||
validate_job(context_id, &job)?;
|
||||
// Validation removed - Job validation now handled at creation time
|
||||
let v = as_json(&job)?;
|
||||
let id = json_get_u32(&v, "id")?;
|
||||
let caller_id = json_get_u32(&v, "caller_id")?;
|
||||
@@ -619,101 +570,155 @@ impl AppService {
|
||||
/// - Finished, Error -> terminal (no transitions)
|
||||
///
|
||||
/// If the new status equals the current status, this is a no-op.
|
||||
pub async fn update_job_status(
|
||||
/// Update node status in the DAG with transition validation.
|
||||
///
|
||||
/// Allowed transitions:
|
||||
/// - Pending -> Ready | Dispatched | Cancelled
|
||||
/// - Ready -> Dispatched | Cancelled
|
||||
/// - Dispatched -> Running | Failed | Cancelled
|
||||
/// - Running -> Completed | Failed | Cancelled
|
||||
/// - Completed, Failed, Cancelled -> terminal (no transitions)
|
||||
///
|
||||
/// If the new status equals the current status, this is a no-op (idempotent).
|
||||
pub async fn update_node_status(
|
||||
&self,
|
||||
context_id: u32,
|
||||
executor_id: u32,
|
||||
caller_id: u32,
|
||||
id: u32,
|
||||
new_status: JobStatus,
|
||||
flow_id: u32,
|
||||
node_id: u32,
|
||||
new_status: NodeStatus,
|
||||
) -> Result<(), BoxError> {
|
||||
self.require_executor(context_id, executor_id, "update job status")
|
||||
self.require_executor(context_id, executor_id, "update node status")
|
||||
.await?;
|
||||
let job = self.redis.load_job(context_id, caller_id, id).await?;
|
||||
let current = job.status();
|
||||
|
||||
|
||||
// Load the DAG
|
||||
let mut dag = build_flow_dag(&self.redis, context_id, flow_id).await?;
|
||||
|
||||
// Get current node status
|
||||
let node = dag.nodes.get(&node_id)
|
||||
.ok_or_else(|| format!("Node {} not found in flow {}", node_id, flow_id))?;
|
||||
let current = node.node_status.clone();
|
||||
|
||||
if new_status == current {
|
||||
// Idempotent: don't touch storage if no change
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
||||
// Validate state transition
|
||||
let allowed = match current {
|
||||
JobStatus::Dispatched => matches!(
|
||||
NodeStatus::Pending => matches!(
|
||||
new_status,
|
||||
JobStatus::WaitingForPrerequisites
|
||||
| JobStatus::Started
|
||||
| JobStatus::Finished
|
||||
| JobStatus::Error
|
||||
NodeStatus::Ready | NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||
),
|
||||
JobStatus::WaitingForPrerequisites => {
|
||||
matches!(
|
||||
new_status,
|
||||
JobStatus::Started | JobStatus::Finished | JobStatus::Error
|
||||
)
|
||||
}
|
||||
JobStatus::Started => matches!(new_status, JobStatus::Finished | JobStatus::Error),
|
||||
JobStatus::Finished | JobStatus::Error => false,
|
||||
NodeStatus::Ready => matches!(
|
||||
new_status,
|
||||
NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||
),
|
||||
NodeStatus::Dispatched => matches!(
|
||||
new_status,
|
||||
NodeStatus::Running | NodeStatus::Failed | NodeStatus::Cancelled
|
||||
),
|
||||
NodeStatus::Running => matches!(
|
||||
new_status,
|
||||
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled
|
||||
),
|
||||
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled => false,
|
||||
};
|
||||
|
||||
|
||||
if !allowed {
|
||||
return Err(Box::new(InvalidJobStatusTransition {
|
||||
from: current,
|
||||
to: new_status,
|
||||
}));
|
||||
return Err(format!(
|
||||
"Invalid node status transition from {:?} to {:?}",
|
||||
current, new_status
|
||||
).into());
|
||||
}
|
||||
|
||||
self.redis
|
||||
.update_job_status(context_id, caller_id, id, new_status)
|
||||
.await?;
|
||||
|
||||
|
||||
// Update the node status
|
||||
if let Some(node) = dag.nodes.get_mut(&node_id) {
|
||||
node.node_status = new_status;
|
||||
|
||||
// Persist the updated DAG
|
||||
// TODO: Implement DAG persistence
|
||||
// self.redis.save_flow_dag(context_id, flow_id, &dag).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
/// Bypass-permission variant to update a job status with transition validation.
|
||||
/// Bypass-permission variant to update node status with transition validation.
|
||||
/// This skips the executor permission check but enforces the same state transition rules.
|
||||
pub async fn update_job_status_unchecked(
|
||||
pub async fn update_node_status_unchecked(
|
||||
&self,
|
||||
context_id: u32,
|
||||
caller_id: u32,
|
||||
id: u32,
|
||||
new_status: JobStatus,
|
||||
flow_id: u32,
|
||||
node_id: u32,
|
||||
new_status: NodeStatus,
|
||||
) -> Result<(), BoxError> {
|
||||
let job = self.redis.load_job(context_id, caller_id, id).await?;
|
||||
let current = job.status();
|
||||
|
||||
// Load the DAG
|
||||
let mut dag = build_flow_dag(&self.redis, context_id, flow_id).await?;
|
||||
|
||||
// Get current node status
|
||||
let node = dag.nodes.get(&node_id)
|
||||
.ok_or_else(|| format!("Node {} not found in flow {}", node_id, flow_id))?;
|
||||
let current = node.node_status.clone();
|
||||
|
||||
if new_status == current {
|
||||
// Idempotent: don't touch storage if no change
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
||||
// Validate state transition
|
||||
let allowed = match current {
|
||||
JobStatus::Dispatched => matches!(
|
||||
NodeStatus::Pending => matches!(
|
||||
new_status,
|
||||
JobStatus::WaitingForPrerequisites
|
||||
| JobStatus::Started
|
||||
| JobStatus::Finished
|
||||
| JobStatus::Error
|
||||
NodeStatus::Ready | NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||
),
|
||||
JobStatus::WaitingForPrerequisites => {
|
||||
matches!(
|
||||
new_status,
|
||||
JobStatus::Started | JobStatus::Finished | JobStatus::Error
|
||||
)
|
||||
}
|
||||
JobStatus::Started => matches!(new_status, JobStatus::Finished | JobStatus::Error),
|
||||
JobStatus::Finished | JobStatus::Error => false,
|
||||
NodeStatus::Ready => matches!(
|
||||
new_status,
|
||||
NodeStatus::Dispatched | NodeStatus::Cancelled
|
||||
),
|
||||
NodeStatus::Dispatched => matches!(
|
||||
new_status,
|
||||
NodeStatus::Running | NodeStatus::Failed | NodeStatus::Cancelled
|
||||
),
|
||||
NodeStatus::Running => matches!(
|
||||
new_status,
|
||||
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled
|
||||
),
|
||||
NodeStatus::Completed | NodeStatus::Failed | NodeStatus::Cancelled => false,
|
||||
};
|
||||
|
||||
|
||||
if !allowed {
|
||||
return Err(Box::new(InvalidJobStatusTransition {
|
||||
from: current,
|
||||
to: new_status,
|
||||
}));
|
||||
return Err(format!(
|
||||
"Invalid node status transition from {:?} to {:?}",
|
||||
current, new_status
|
||||
).into());
|
||||
}
|
||||
|
||||
self.redis
|
||||
.update_job_status(context_id, caller_id, id, new_status)
|
||||
.await?;
|
||||
|
||||
|
||||
// Update the node status
|
||||
if let Some(node) = dag.nodes.get_mut(&node_id) {
|
||||
node.node_status = new_status.clone();
|
||||
|
||||
// Update DAG runtime state for ready_jobs() to work correctly
|
||||
match new_status {
|
||||
NodeStatus::Dispatched | NodeStatus::Running => {
|
||||
dag.started.insert(node_id);
|
||||
}
|
||||
NodeStatus::Completed => {
|
||||
dag.started.insert(node_id);
|
||||
dag.completed.insert(node_id);
|
||||
}
|
||||
NodeStatus::Failed => {
|
||||
dag.started.insert(node_id);
|
||||
dag.failed_job = Some(node_id);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Persist the updated DAG
|
||||
// TODO: Implement DAG persistence to Redis
|
||||
// For now, the DAG is rebuilt each time, so runtime state is lost
|
||||
// self.redis.save_flow_dag(context_id, flow_id, &dag).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1003,20 +1008,7 @@ impl AppService {
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_actor_not_exists_global(&self, id: u32) -> Result<(), BoxError> {
|
||||
match self.redis.load_actor_global(id).await {
|
||||
Ok(_) => Err(Box::new(AlreadyExistsError {
|
||||
key: format!("actor:{}", id),
|
||||
})),
|
||||
Err(e) => {
|
||||
if contains_key_not_found(&e) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async fn ensure_runner_not_exists(&self, db: u32, id: u32) -> Result<(), BoxError> {
|
||||
match self.redis.load_runner(db, id).await {
|
||||
|
||||
Reference in New Issue
Block a user