Compare commits

...

13 Commits

Author SHA1 Message Date
Lee Smet
c4971aa794 Add full flow script example
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:17:12 +02:00
Lee Smet
7aa35b6d06 Fix remainder of HSET return value deconding
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:16:53 +02:00
Lee Smet
60946af1df Fix pushMessage parameter encoding
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:11:10 +02:00
Lee Smet
83990cf16a Properly encode topic in mycelium rpc
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 20:09:47 +02:00
Lee Smet
dbb9493bcb Improve code format in router
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 14:54:11 +02:00
Lee Smet
d921dca75c Fix default mycelium jsonrpc api port
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 14:50:45 +02:00
Lee Smet
4a15269442 Fix more HSET types in redis driver
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 14:46:55 +02:00
Lee Smet
43fd61d662 Remove unused imports
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-09-03 11:29:26 +02:00
Lee Smet
38709e06f3 Add script to test actor/context/job/flow create and flow dag
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 15:43:32 +02:00
Lee Smet
08de312cd9 Fix HSET response decoding
The command internally uses (the deprecated) HMSET which just returns OK
on success instead of the amount of fields written

Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:30:41 +02:00
Lee Smet
4d1cd3d910 Format codebase
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:22:42 +02:00
Lee Smet
c1c1ae3bd1 Bump thiserror to latest version
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:22:12 +02:00
Lee Smet
ec339c5cbe Add some internal logging
Signed-off-by: Lee Smet <lee.smet@hotmail.com>
2025-08-29 11:10:04 +02:00
15 changed files with 1178 additions and 192 deletions

195
Cargo.lock generated
View File

@@ -17,6 +17,15 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.20"
@@ -514,9 +523,10 @@ dependencies = [
"reqwest",
"serde",
"serde_json",
"thiserror 1.0.69",
"thiserror",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
@@ -846,7 +856,7 @@ dependencies = [
"rustc-hash",
"serde",
"serde_json",
"thiserror 2.0.16",
"thiserror",
"tokio",
"tower",
"tracing",
@@ -884,7 +894,7 @@ dependencies = [
"serde",
"serde_json",
"soketto",
"thiserror 2.0.16",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util",
@@ -901,9 +911,15 @@ dependencies = [
"http",
"serde",
"serde_json",
"thiserror 2.0.16",
"thiserror",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.175"
@@ -944,6 +960,15 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "memchr"
version = "2.7.5"
@@ -993,6 +1018,16 @@ dependencies = [
"tempfile",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
@@ -1086,6 +1121,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.4"
@@ -1203,7 +1244,7 @@ dependencies = [
"rustc-hash",
"rustls",
"socket2 0.5.10",
"thiserror 2.0.16",
"thiserror",
"tokio",
"tracing",
"web-time",
@@ -1224,7 +1265,7 @@ dependencies = [
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.16",
"thiserror",
"tinyvec",
"tracing",
"web-time",
@@ -1352,6 +1393,50 @@ dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.4.10",
"regex-syntax 0.8.6",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
name = "regex-automata"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.6",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]]
name = "reqwest"
version = "0.12.23"
@@ -1587,6 +1672,15 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "shlex"
version = "1.3.0"
@@ -1733,33 +1827,13 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl 1.0.69",
]
[[package]]
name = "thiserror"
version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
dependencies = [
"thiserror-impl 2.0.16",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn",
"thiserror-impl",
]
[[package]]
@@ -1773,6 +1847,15 @@ dependencies = [
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [
"cfg-if",
]
[[package]]
name = "tinystr"
version = "0.8.1"
@@ -1966,6 +2049,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
@@ -2015,6 +2128,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "valuable"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]]
name = "vcpkg"
version = "0.2.15"
@@ -2151,6 +2270,28 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-link"
version = "0.1.3"

View File

@@ -21,4 +21,5 @@ reqwest = { version = "0.12.7", features = ["json", "rustls-tls"] }
# Base64 encoding for message payloads
base64 = "0.22.1"
# Error derive for clean error types
thiserror = "1.0.64"
thiserror = "2.0.16"
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "fmt"] }

361
scripts/jsonrpc_demo.py Normal file
View File

@@ -0,0 +1,361 @@
#!/usr/bin/env python3
"""
Demo script for HeroCoordinator JSON-RPC API.
- Creates an actor
- Verifies by loading the actor
- Creates a context with the actor as admin/reader/executor
- Creates three jobs with dependencies
- Creates a flow referencing those jobs
- Fetches and prints the flow DAG
Usage:
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/jsonrpc_demo.py
Defaults to http://127.0.0.1:9652 if COORDINATOR_URL is not set.
"""
import os
import json
import sys
from urllib import request, error
from typing import Any, Dict, List, Tuple
JSONRPC_VERSION = "2.0"
class JsonRpcClient:
def __init__(self, url: str):
self.url = url.rstrip("/")
self._id = 0
def call(self, method: str, params: Dict[str, Any]) -> Any:
self._id += 1
payload = {
"jsonrpc": JSONRPC_VERSION,
"id": self._id,
"method": method,
"params": params,
}
data = json.dumps(payload).encode("utf-8")
req = request.Request(self.url, data=data, headers={"Content-Type": "application/json"})
try:
with request.urlopen(req) as resp:
body = resp.read()
except error.HTTPError as e:
try:
details = e.read().decode("utf-8", "ignore")
except Exception:
details = ""
raise RuntimeError(f"HTTP error {e.code}: {details}") from e
except error.URLError as e:
raise RuntimeError(f"URL error: {e.reason}") from e
try:
obj = json.loads(body.decode("utf-8"))
except Exception as e:
raise RuntimeError(f"Invalid JSON response: {body!r}") from e
# JSON-RPC single response expected
if isinstance(obj, list):
raise RuntimeError("Batch responses are not supported in this demo")
if obj.get("error"):
raise RuntimeError(f"RPC error: {json.dumps(obj['error'])}")
return obj.get("result")
def print_header(title: str):
print("\n" + "=" * 80)
print(title)
print("=" * 80)
def pretty_print(obj: Any):
print(json.dumps(obj, indent=2, sort_keys=True))
def summarize_dag(dag: Dict[str, Any]):
print_header("Flow DAG Summary")
flow_id = dag.get("flow_id")
caller_id = dag.get("caller_id")
context_id = dag.get("context_id")
print(f"flow_id={flow_id} caller_id={caller_id} context_id={context_id}")
edges: List[Tuple[int, int]] = dag.get("edges", [])
roots: List[int] = dag.get("roots", [])
leaves: List[int] = dag.get("leaves", [])
levels: List[List[int]] = dag.get("levels", [])
nodes: Dict[str, Any] = dag.get("nodes", {})
print("Edges:")
for a, b in edges:
print(f" {a} -> {b}")
print(f"Roots: {roots}")
print(f"Leaves: {leaves}")
print("Levels:")
for i, lvl in enumerate(levels):
print(f" L{i}: {lvl}")
# Show nodes and their dependencies (from JobSummary)
print("Nodes:")
for k, v in nodes.items():
depends = v.get("depends", [])
prerequisites = v.get("prerequisites", [])
stype = v.get("script_type")
print(f" Job {k}: depends={depends} prerequisites={prerequisites} script_type={stype}")
def assert_edges(edges: List[Tuple[int, int]], required: List[Tuple[int, int]]):
edge_set = {(int(a), int(b)) for a, b in edges}
missing = [e for e in required if e not in edge_set]
if missing:
raise AssertionError(f"Missing expected edges in DAG: {missing}; got={sorted(edge_set)}")
def main():
url = os.getenv("COORDINATOR_URL", "http://127.0.0.1:9652")
client = JsonRpcClient(url)
# Deterministic demo IDs; change if collisions happen
actor_id = 1001
context_id = 1 # Redis DB indices are 0-15; keep <= 15
job_a = 3001
job_b = 3002
job_c = 3003
job_d = 3004
job_e = 3005
job_f = 3006
job_g = 3007
job_h = 3008
job_i = 3009
flow_id = 4001
print_header("actor.create")
actor = client.call("actor.create", {
"actor": {
"id": actor_id,
"pubkey": "demo-pubkey",
"address": ["127.0.0.1"]
}
})
pretty_print(actor)
print_header("actor.load")
actor_loaded = client.call("actor.load", {"id": actor_id})
pretty_print(actor_loaded)
print_header("context.create")
context = client.call("context.create", {
"context": {
"id": context_id,
"admins": [actor_id],
"readers": [actor_id],
"executors": [actor_id]
}
})
pretty_print(context)
print_header("job.create - A (root)")
jobA = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_a,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('A')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": []
}
})
pretty_print(jobA)
print_header("job.create - B (root)")
jobB = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_b,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('B')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": []
}
})
pretty_print(jobB)
print_header("job.create - C (depends on A and B)")
jobC = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_c,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('C')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_a, job_b]
}
})
pretty_print(jobC)
print_header("job.create - D (depends on A)")
jobD = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_d,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('D')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_a]
}
})
pretty_print(jobD)
print_header("job.create - E (depends on B)")
jobE = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_e,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('E')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_b]
}
})
pretty_print(jobE)
print_header("job.create - F (depends on C and D)")
jobF = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_f,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('F')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_c, job_d]
}
})
pretty_print(jobF)
print_header("job.create - G (depends on C and E)")
jobG = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_g,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('G')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_c, job_e]
}
})
pretty_print(jobG)
print_header("job.create - H (leaf; depends on F and G)")
jobH = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_h,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('H')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_f, job_g]
}
})
pretty_print(jobH)
print_header("job.create - I (leaf; depends on F and G)")
jobI = client.call("job.create", {
"context_id": context_id,
"job": {
"id": job_i,
"caller_id": actor_id,
"context_id": context_id,
"script": "print('I')",
"script_type": "Python",
"timeout": 30,
"retries": 0,
"env_vars": {},
"prerequisites": [],
"depends": [job_f, job_g]
}
})
pretty_print(jobI)
print_header("flow.create")
flow = client.call("flow.create", {
"context_id": context_id,
"flow": {
"id": flow_id,
"caller_id": actor_id,
"context_id": context_id,
"jobs": [job_a, job_b, job_c, job_d, job_e, job_f, job_g, job_h, job_i],
"env_vars": {}
}
})
pretty_print(flow)
print_header("flow.dag")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
summarize_dag(dag)
# Validate roots and leaves
got_roots = list(map(int, dag.get("roots", [])))
if got_roots != sorted([job_a, job_b]):
print("WARNING: Unexpected roots:", got_roots, file=sys.stderr)
got_leaves = {int(x) for x in dag.get("leaves", [])}
expected_leaves = {job_h, job_i}
if got_leaves != expected_leaves:
print("WARNING: Unexpected leaves:", got_leaves, "expected:", expected_leaves, file=sys.stderr)
# Check edges reflect the expanded DAG
expected_edges = [
(job_a, job_c), (job_b, job_c),
(job_a, job_d), (job_b, job_e),
(job_c, job_f), (job_d, job_f),
(job_c, job_g), (job_e, job_g),
(job_f, job_h), (job_g, job_h),
(job_f, job_i), (job_g, job_i),
]
try:
assert_edges(dag.get("edges", []), expected_edges)
print("DAG edges contain expected dependencies:", expected_edges)
except AssertionError as e:
print("WARNING:", e, file=sys.stderr)
if __name__ == "__main__":
try:
main()
except Exception as e:
print_header("Error")
print(str(e))
sys.exit(1)

View File

@@ -0,0 +1,349 @@
#!/usr/bin/env python3
"""
Supervisor flow demo for HeroCoordinator.
This script:
- Creates an actor
- Creates a context granting the actor admin/reader/executor privileges
- Registers a Runner in the context targeting a Supervisor reachable via Mycelium (by public key or IP)
- Creates simple Python jobs (text jobs) with a small dependency chain
- Creates a flow referencing those jobs
- Starts the flow and polls until it finishes (or errors)
Transport: JSON-RPC over HTTP to the Coordinator (default COORDINATOR_URL=http://127.0.0.1:9652).
Example usage:
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/supervisor_flow_demo.py --dst-ip 2001:db8::1
COORDINATOR_URL=http://127.0.0.1:9652 python3 scripts/supervisor_flow_demo.py --dst-pk bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32
Notes:
- Exactly one of --dst-ip or --dst-pk must be provided.
- Runner.topic defaults to "supervisor.rpc" (see main.rs).
- The router auto-discovers contexts and will deliver job.run messages to the supervisor.
"""
import argparse
import json
import os
import sys
import time
from typing import Any, Dict, List, Optional, Tuple
from urllib import request, error
JSONRPC_VERSION = "2.0"
def env_url() -> str:
return os.getenv("COORDINATOR_URL", "http://127.0.0.1:9652").rstrip("/")
class JsonRpcClient:
def __init__(self, url: str):
self.url = url
self._id = 0
def call(self, method: str, params: Dict[str, Any]) -> Any:
self._id += 1
payload = {
"jsonrpc": JSONRPC_VERSION,
"id": self._id,
"method": method,
"params": params,
}
data = json.dumps(payload).encode("utf-8")
req = request.Request(self.url, data=data, headers={"Content-Type": "application/json"})
try:
with request.urlopen(req) as resp:
body = resp.read()
except error.HTTPError as e:
try:
details = e.read().decode("utf-8", "ignore")
except Exception:
details = ""
raise RuntimeError(f"HTTP error {e.code}: {details}") from e
except error.URLError as e:
raise RuntimeError(f"URL error: {e.reason}") from e
try:
obj = json.loads(body.decode("utf-8"))
except Exception as e:
raise RuntimeError(f"Invalid JSON response: {body!r}") from e
if isinstance(obj, list):
raise RuntimeError("Batch responses are not supported")
if obj.get("error"):
raise RuntimeError(f"RPC error: {json.dumps(obj['error'])}")
return obj.get("result")
def print_header(title: str):
print("\n" + "=" * 80)
print(title)
print("=" * 80)
def pretty(obj: Any):
print(json.dumps(obj, indent=2, sort_keys=True))
def try_create_or_load(client: JsonRpcClient, create_method: str, create_params: Dict[str, Any],
load_method: str, load_params: Dict[str, Any]) -> Any:
"""Attempt a create; if it fails due to existence, try load."""
try:
return client.call(create_method, create_params)
except RuntimeError as e:
msg = str(e)
# Server maps AlreadyExists to StorageError, we don't have a structured error code here.
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
# Fall back to load
return client.call(load_method, load_params)
raise
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Create actor/context/runner/jobs/flow; start and wait until completion.")
group = p.add_mutually_exclusive_group(required=True)
group.add_argument("--dst-ip", help="Supervisor Mycelium IP address (IPv4 or IPv6)")
group.add_argument("--dst-pk", help="Supervisor public key (64-hex)")
p.add_argument("--context-id", type=int, default=2, help="Context id (Redis DB index; 0-15). Default: 2")
p.add_argument("--actor-id", type=int, default=11001, help="Actor id. Default: 11001")
p.add_argument("--runner-id", type=int, default=12001, help="Runner id. Default: 12001")
p.add_argument("--flow-id", type=int, default=13001, help="Flow id. Default: 13001")
p.add_argument("--base-job-id", type=int, default=20000, help="Base job id for first job; subsequent jobs increment. Default: 20000")
p.add_argument("--jobs", type=int, default=3, help="Number of jobs to create (>=1). Forms a simple chain. Default: 3")
p.add_argument("--timeout-secs", type=int, default=60, help="Per-job timeout seconds. Default: 60")
p.add_argument("--retries", type=int, default=0, help="Per-job retries (0-255). Default: 0")
p.add_argument(
"--script-type",
choices=["Python", "V", "Osis", "Sal"],
default="Python",
help="ScriptType for jobs/runner. Default: Python"
)
p.add_argument("--topic", default="supervisor.rpc", help="Supervisor topic. Default: supervisor.rpc")
p.add_argument("--poll-interval", type=float, default=2.0, help="Flow poll interval seconds. Default: 2.0")
p.add_argument("--poll-timeout", type=int, default=600, help="Max seconds to wait for flow completion. Default: 600")
return p.parse_args()
def main():
args = parse_args()
if args.jobs < 1:
print("ERROR: --jobs must be >= 1", file=sys.stderr)
sys.exit(2)
url = env_url()
client = JsonRpcClient(url)
actor_id = int(args.actor_id)
context_id = int(args.context_id)
runner_id = int(args.runner_id)
flow_id = int(args.flow_id)
base_job_id = int(args.base_job_id)
script_type = args.script_type
timeout = int(args.timeout_secs)
retries = int(args.retries)
topic = args.topic
# 1) Actor
print_header("actor.create (or load)")
actor = try_create_or_load(
client,
"actor.create",
{
"actor": {
"id": actor_id,
"pubkey": "demo-pubkey",
"address": ["127.0.0.1"],
}
},
"actor.load",
{"id": actor_id},
)
pretty(actor)
# 2) Context
print_header("context.create (or load)")
context = try_create_or_load(
client,
"context.create",
{
"context": {
"id": context_id,
"admins": [actor_id],
"readers": [actor_id],
"executors": [actor_id],
}
},
"context.load",
{"id": context_id},
)
pretty(context)
# 3) Runner in this context
# Router picks pubkey if non-empty, else IP address.
# However, RunnerCreate requires both fields; we fill both and control routing via pubkey empty/non-empty.
runner_pubkey = args.dst_pk if args.dst_pk else ""
runner_address = args.dst_ip if args.dst_ip else "127.0.0.1"
print_header("runner.create (or load)")
# runner.load requires both context_id and id
try:
runner = client.call("runner.create", {
"context_id": context_id,
"runner": {
"id": runner_id,
"pubkey": runner_pubkey,
"address": runner_address,
"topic": topic,
"script_type": script_type,
"local": False
}
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
runner = client.call("runner.load", {"context_id": context_id, "id": runner_id})
else:
raise
pretty(runner)
# 4) Jobs
# Build a simple chain: J0 (root), J1 depends on J0, J2 depends on J1, ... up to N-1
job_ids: List[int] = []
for i in range(args.jobs):
jid = base_job_id + i
depends = [] if i == 0 else [base_job_id + (i - 1)]
job_payload = {
"id": jid,
"caller_id": actor_id,
"context_id": context_id,
"script": f"print('Job {i} running')",
"script_type": script_type,
"timeout": timeout,
"retries": retries,
"env_vars": {},
"prerequisites": [],
"depends": depends,
}
print_header(f"job.create - {jid} {'(root)' if not depends else f'(depends on {depends})'}")
try:
job = client.call("job.create", {
"context_id": context_id,
"job": job_payload
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
job = client.call("job.load", {
"context_id": context_id,
"caller_id": actor_id,
"id": jid
})
else:
raise
pretty(job)
job_ids.append(jid)
# 5) Flow
print_header("flow.create (or load)")
try:
flow = client.call("flow.create", {
"context_id": context_id,
"flow": {
"id": flow_id,
"caller_id": actor_id,
"context_id": context_id,
"jobs": job_ids,
"env_vars": {}
}
})
except RuntimeError as e:
msg = str(e)
if "Already exists" in msg or "Storage Error" in msg or "Invalid params" in msg:
flow = client.call("flow.load", {"context_id": context_id, "id": flow_id})
else:
raise
pretty(flow)
# Optional: show DAG
try:
print_header("flow.dag")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
pretty(dag)
except Exception as e:
print(f"WARN: flow.dag failed: {e}", file=sys.stderr)
# 6) Start flow (idempotent; returns bool whether scheduler started)
print_header("flow.start")
started = client.call("flow.start", {"context_id": context_id, "id": flow_id})
print(f"flow.start -> {started}")
# 7) Poll until Finished or Error (or timeout)
print_header("Polling flow.load until completion")
t0 = time.time()
status = None
last_status_print = 0.0
poll_count = 0
while True:
poll_count += 1
flow = client.call("flow.load", {"context_id": context_id, "id": flow_id})
status = flow.get("status")
now = time.time()
if now - last_status_print >= max(1.0, float(args.poll_interval)):
print(f"[{int(now - t0)}s] flow.status = {status}")
last_status_print = now
# Every 5th poll, print the current flow DAG
if (poll_count % 5) == 0:
try:
print_header("flow.dag (periodic)")
dag = client.call("flow.dag", {"context_id": context_id, "id": flow_id})
pretty(dag)
except Exception as e:
print(f"WARN: periodic flow.dag failed: {e}", file=sys.stderr)
if status in ("Finished", "Error"):
break
if (now - t0) > args.poll_timeout:
print(f"ERROR: Flow did not complete within {args.poll_timeout}s (status={status})", file=sys.stderr)
break
time.sleep(float(args.poll_interval))
# 8) Final summary: job statuses
print_header("Final job statuses")
for jid in job_ids:
try:
j = client.call("job.load", {
"context_id": context_id,
"caller_id": actor_id,
"id": jid
})
print(f"Job {jid}: status={j.get('status')} result={j.get('result')}")
except Exception as e:
print(f"Job {jid}: load failed: {e}", file=sys.stderr)
# Exit code
if status == "Finished":
print_header("Result")
print("Flow finished successfully.")
sys.exit(0)
else:
print_header("Result")
print(f"Flow ended with status={status}")
sys.exit(1)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\nInterrupted.")
sys.exit(130)
except Exception as e:
print_header("Error")
print(str(e))
sys.exit(1)

View File

@@ -1,13 +1,7 @@
pub mod supervisor_client;
pub mod mycelium_client;
pub mod supervisor_client;
pub mod types;
pub use mycelium_client::{MyceliumClient, MyceliumClientError};
pub use supervisor_client::{SupervisorClient, SupervisorClientError};
pub use types::Destination;
pub use supervisor_client::{
SupervisorClient,
SupervisorClientError,
};
pub use mycelium_client::{
MyceliumClient,
MyceliumClientError,
};

View File

@@ -6,13 +6,13 @@ use reqwest::Client as HttpClient;
use serde_json::{Value, json};
use thiserror::Error;
use crate::models::TransportStatus;
use crate::clients::Destination;
use crate::models::TransportStatus;
/// Lightweight client for Mycelium JSON-RPC (send + query status)
#[derive(Clone)]
pub struct MyceliumClient {
base_url: String, // e.g. http://127.0.0.1:8990
base_url: String, // e.g. http://127.0.0.1:8990
http: HttpClient,
id_counter: Arc<AtomicU64>,
}
@@ -58,20 +58,30 @@ impl MyceliumClient {
let body: Value = resp.json().await?;
if let Some(err) = body.get("error") {
let code = err.get("code").and_then(|v| v.as_i64()).unwrap_or(0);
let msg = err.get("message").and_then(|v| v.as_str()).unwrap_or("unknown error");
let msg = err
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("unknown error");
if code == 408 {
return Err(MyceliumClientError::TransportTimeout);
}
return Err(MyceliumClientError::RpcError(format!("code={code} msg={msg}")));
return Err(MyceliumClientError::RpcError(format!(
"code={code} msg={msg}"
)));
}
if !status.is_success() {
return Err(MyceliumClientError::RpcError(format!("HTTP {status}, body {body}")));
return Err(MyceliumClientError::RpcError(format!(
"HTTP {status}, body {body}"
)));
}
Ok(body)
}
/// Call messageStatus with an outbound message id (hex string)
pub async fn message_status(&self, id_hex: &str) -> Result<TransportStatus, MyceliumClientError> {
pub async fn message_status(
&self,
id_hex: &str,
) -> Result<TransportStatus, MyceliumClientError> {
let params = json!({ "id": id_hex });
let body = self.jsonrpc("messageStatus", params).await?;
let result = body.get("result").ok_or_else(|| {
@@ -83,7 +93,9 @@ impl MyceliumClient {
} else if let Some(s) = result.as_str() {
s.to_string()
} else {
return Err(MyceliumClientError::InvalidResponse(format!("unexpected result shape: {result}")));
return Err(MyceliumClientError::InvalidResponse(format!(
"unexpected result shape: {result}"
)));
};
Self::map_status(&status_str).ok_or_else(|| {
MyceliumClientError::InvalidResponse(format!("unknown status: {status_str}"))
@@ -113,16 +125,15 @@ impl MyceliumClient {
Destination::Ip(ip) => json!({ "ip": ip.to_string() }),
Destination::Pk(pk) => json!({ "pk": pk }),
};
let message = json!({
let mut message = json!({
"dst": dst_v,
"topic": topic,
"payload": payload_b64,
});
let mut params = json!({ "message": message });
if let Some(rt) = reply_timeout {
params["reply_timeout"] = json!(rt);
message["reply_timeout"] = json!(rt);
}
params
message
}
/// pushMessage: send a message with dst/topic/payload. Optional reply_timeout for sync replies.
@@ -143,7 +154,10 @@ impl MyceliumClient {
/// Helper to extract outbound message id from pushMessage result (InboundMessage or PushMessageResponseId)
pub fn extract_message_id_from_result(result: &Value) -> Option<String> {
result.get("id").and_then(|v| v.as_str()).map(|s| s.to_string())
result
.get("id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
}
@@ -162,24 +176,39 @@ mod tests {
Some(10),
);
let msg1 = p1.get("message").unwrap();
assert_eq!(msg1.get("topic").unwrap().as_str().unwrap(), "supervisor.rpc");
assert_eq!(
msg1.get("topic").unwrap().as_str().unwrap(),
"supervisor.rpc"
);
assert_eq!(msg1.get("payload").unwrap().as_str().unwrap(), "Zm9vYmFy");
assert_eq!(
msg1.get("dst").unwrap().get("ip").unwrap().as_str().unwrap(),
msg1.get("dst")
.unwrap()
.get("ip")
.unwrap()
.as_str()
.unwrap(),
"2001:db8::1"
);
assert_eq!(p1.get("reply_timeout").unwrap().as_u64().unwrap(), 10);
// PK destination without timeout
let p2 = MyceliumClient::build_push_params(
&Destination::Pk("bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".into()),
&Destination::Pk(
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32".into(),
),
"supervisor.rpc",
"YmF6", // "baz"
None,
);
let msg2 = p2.get("message").unwrap();
assert_eq!(
msg2.get("dst").unwrap().get("pk").unwrap().as_str().unwrap(),
msg2.get("dst")
.unwrap()
.get("pk")
.unwrap()
.as_str()
.unwrap(),
"bb39b4a3a4efd70f3e05e37887677e02efbda14681d0acd3882bc0f754792c32"
);
assert!(p2.get("reply_timeout").is_none());
@@ -205,4 +234,4 @@ mod tests {
"fedcba9876543210"
);
}
}
}

View File

@@ -104,6 +104,10 @@ impl SupervisorClient {
Ok(BASE64_STANDARD.encode(s.as_bytes()))
}
fn encode_topic(topic: &[u8]) -> String {
BASE64_STANDARD.encode(topic)
}
fn extract_message_id_from_result(result: &Value) -> Option<String> {
// Two possibilities per Mycelium spec oneOf:
// - PushMessageResponseId: { "id": "0123456789abcdef" }
@@ -120,7 +124,12 @@ impl SupervisorClient {
let payload_b64 = Self::encode_payload(&inner)?;
let result = self
.mycelium
.push_message(&self.destination, &self.topic, &payload_b64, None)
.push_message(
&self.destination,
&Self::encode_topic(self.topic.as_bytes()),
&payload_b64,
None,
)
.await?;
if let Some(id) = MyceliumClient::extract_message_id_from_result(&result) {
@@ -151,7 +160,12 @@ impl SupervisorClient {
let result = self
.mycelium
.push_message(&self.destination, &self.topic, &payload_b64, Some(reply_timeout_secs))
.push_message(
&self.destination,
&self.topic,
&payload_b64,
Some(reply_timeout_secs),
)
.await?;
// Expect an InboundMessage-like with a base64 payload containing the supervisor JSON-RPC response
@@ -163,7 +177,11 @@ impl SupervisorClient {
one.get("payload")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| SupervisorClientError::InvalidResponse(format!("missing payload in result: {result}")))?
.ok_or_else(|| {
SupervisorClientError::InvalidResponse(format!(
"missing payload in result: {result}"
))
})?
} else {
return Err(SupervisorClientError::TransportTimeout);
}
@@ -174,15 +192,19 @@ impl SupervisorClient {
let raw = BASE64_STANDARD
.decode(payload_field.as_bytes())
.map_err(|e| SupervisorClientError::InvalidResponse(format!("invalid base64 payload: {e}")))?;
.map_err(|e| {
SupervisorClientError::InvalidResponse(format!("invalid base64 payload: {e}"))
})?;
let rpc_resp: Value = serde_json::from_slice(&raw)?;
if let Some(err) = rpc_resp.get("error") {
return Err(SupervisorClientError::RpcError(err.to_string()));
}
let res = rpc_resp
.get("result")
.ok_or_else(|| SupervisorClientError::InvalidResponse(format!("missing result in supervisor reply: {rpc_resp}")))?;
let res = rpc_resp.get("result").ok_or_else(|| {
SupervisorClientError::InvalidResponse(format!(
"missing result in supervisor reply: {rpc_resp}"
))
})?;
Ok(res.clone())
}

View File

@@ -6,4 +6,4 @@ pub enum Destination {
Ip(IpAddr),
/// 64-hex public key of the receiver node
Pk(String),
}
}

View File

@@ -1,8 +1,8 @@
pub mod models;
pub mod storage;
pub mod service;
mod time;
pub mod dag;
pub mod rpc;
pub mod clients;
pub mod dag;
pub mod models;
pub mod router;
pub mod rpc;
pub mod service;
pub mod storage;
mod time;

View File

@@ -2,6 +2,8 @@ use clap::Parser;
use std::net::{IpAddr, SocketAddr};
use std::sync::Arc;
use tracing::{error, info};
use tracing_subscriber::EnvFilter;
#[derive(Debug, Clone, Parser)]
#[command(
name = "herocoordinator",
@@ -23,8 +25,8 @@ struct Cli {
long = "mycelium-port",
short = 'p',
env = "MYCELIUM_PORT",
default_value_t = 9651u16,
help = "Port for Mycelium JSON-RPC (default: 9651)"
default_value_t = 8990u16,
help = "Port for Mycelium JSON-RPC (default: 8990)"
)]
mycelium_port: u16,
@@ -73,6 +75,14 @@ struct Cli {
#[tokio::main]
async fn main() {
let cli = Cli::parse();
// Initialize tracing subscriber (pretty formatter; controlled by RUST_LOG)
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
tracing_subscriber::fmt()
.with_env_filter(filter)
.pretty()
.with_target(true)
.with_level(true)
.init();
let http_addr = SocketAddr::new(cli.api_http_ip, cli.api_http_port);
let ws_addr = SocketAddr::new(cli.api_ws_ip, cli.api_ws_port);
@@ -107,10 +117,7 @@ async fn main() {
let http_module = herocoordinator::rpc::build_module(state.clone());
let ws_module = herocoordinator::rpc::build_module(state.clone());
println!(
"Starting JSON-RPC servers: HTTP http://{} | WS ws://{} | redis_addr={}",
http_addr, ws_addr, cli.redis_addr
);
info!(%http_addr, %ws_addr, redis_addr=%cli.redis_addr, "Starting JSON-RPC servers");
// Start servers
let _http_handle = herocoordinator::rpc::start_http(http_addr, http_module)
@@ -122,7 +129,7 @@ async fn main() {
// Wait for Ctrl+C to terminate
if let Err(e) = tokio::signal::ctrl_c().await {
eprintln!("Failed to listen for shutdown signal: {e}");
error!(error=%e, "Failed to listen for shutdown signal");
}
println!("Shutdown signal received, exiting.");
info!("Shutdown signal received, exiting.");
}

View File

@@ -4,10 +4,11 @@ use serde_json::{Value, json};
use tokio::sync::Semaphore;
use crate::{
clients::{Destination, SupervisorClient, MyceliumClient},
clients::{Destination, MyceliumClient, SupervisorClient},
models::{Job, JobStatus, Message, MessageStatus, ScriptType, TransportStatus},
service::AppService,
};
use tracing::{error, info};
#[derive(Clone, Debug)]
pub struct RouterConfig {
@@ -40,7 +41,7 @@ pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::
match MyceliumClient::new(cfg_cloned.base_url.clone()) {
Ok(c) => break Arc::new(c),
Err(e) => {
eprintln!("[router ctx={}] MyceliumClient init error: {}", ctx_id, e);
error!(context_id=ctx_id, error=%e, "MyceliumClient init error");
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
}
@@ -70,12 +71,10 @@ pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::
// Ensure permit is dropped at end of task
let _permit = permit;
if let Err(e) =
deliver_one(&service_task, &cfg_task, ctx_id, &key, mycelium).await
deliver_one(&service_task, &cfg_task, ctx_id, &key, mycelium)
.await
{
eprintln!(
"[router ctx={}] delivery error for {}: {}",
ctx_id, key, e
);
error!(context_id=ctx_id, key=%key, error=%e, "Delivery error");
}
}
});
@@ -85,7 +84,7 @@ pub fn start_router(service: AppService, cfg: RouterConfig) -> Vec<tokio::task::
continue;
}
Err(e) => {
eprintln!("[router ctx={}] brpop error: {}", ctx_id, e);
error!(context_id=ctx_id, error=%e, "BRPOP error");
// small backoff to avoid busy-loop on persistent errors
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
}
@@ -228,50 +227,63 @@ async fn deliver_one(
// Stop on terminal states
if matches!(s, TransportStatus::Delivered | TransportStatus::Read) {
// On Read, fetch supervisor job.status and update local job/message if terminal
if matches!(s, TransportStatus::Read) {
if let Some(job_id) = job_id_opt {
let sup = SupervisorClient::new_with_client(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
None,
);
match sup.job_status_sync(job_id.to_string(), 10).await {
Ok(remote_status) => {
if let Some((mapped, terminal)) =
map_supervisor_job_status(&remote_status)
{
if terminal {
let _ = service_poll
.update_job_status_unchecked(
context_id,
caller_id,
job_id,
mapped.clone(),
)
.await;
if matches!(s, TransportStatus::Read)
&& let Some(job_id) = job_id_opt
{
let sup = SupervisorClient::new_with_client(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
None,
);
match sup.job_status_sync(job_id.to_string(), 10).await {
Ok(remote_status) => {
if let Some((mapped, terminal)) =
map_supervisor_job_status(&remote_status)
{
if terminal {
let _ = service_poll
.update_job_status_unchecked(
context_id,
caller_id,
job_id,
mapped.clone(),
)
.await;
// After terminal status, fetch supervisor job.result and store into Job.result
let sup = SupervisorClient::new_with_client(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
None,
);
match sup.job_result_sync(job_id.to_string(), job_result_reply_timeout).await {
Ok(result_map) => {
// Persist the result into the Job.result map (merge)
let _ = service_poll
.update_job_result_merge_unchecked(
context_id,
caller_id,
job_id,
result_map.clone(),
)
.await;
// Log which key was stored (success or error)
let key = result_map.keys().next().cloned().unwrap_or_else(|| "unknown".to_string());
let _ = service_poll
// After terminal status, fetch supervisor job.result and store into Job.result
let sup = SupervisorClient::new_with_client(
client.clone(),
sup_dest.clone(),
sup_topic.clone(),
None,
);
match sup
.job_result_sync(
job_id.to_string(),
job_result_reply_timeout,
)
.await
{
Ok(result_map) => {
// Persist the result into the Job.result map (merge)
let _ = service_poll
.update_job_result_merge_unchecked(
context_id,
caller_id,
job_id,
result_map.clone(),
)
.await;
// Log which key was stored (success or error)
let key = result_map
.keys()
.next()
.cloned()
.unwrap_or_else(|| {
"unknown".to_string()
});
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
@@ -282,9 +294,9 @@ async fn deliver_one(
)],
)
.await;
}
Err(e) => {
let _ = service_poll
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
@@ -295,19 +307,19 @@ async fn deliver_one(
)],
)
.await;
}
}
}
// Mark message as processed
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
let _ = service_poll
// Mark message as processed
let _ = service_poll
.update_message_status(
context_id,
caller_id,
id,
MessageStatus::Processed,
)
.await;
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
@@ -318,35 +330,31 @@ async fn deliver_one(
)],
)
.await;
}
} else {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"Unknown supervisor status '{}' for job {}",
remote_status, job_id
)],
)
.await;
}
}
Err(e) => {
} else {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!(
"job.status sync error: {}",
e
"Unknown supervisor status '{}' for job {}",
remote_status, job_id
)],
)
.await;
}
}
Err(e) => {
let _ = service_poll
.append_message_logs(
context_id,
caller_id,
id,
vec![format!("job.status sync error: {}", e)],
)
.await;
}
}
}
break;
@@ -436,7 +444,6 @@ fn map_supervisor_job_status(s: &str) -> Option<(JobStatus, bool)> {
}
}
/// Auto-discover contexts periodically and ensure a router loop exists for each.
/// Returns a JoinHandle of the discovery task (router loops are detached).
pub fn start_router_auto(service: AppService, cfg: RouterConfig) -> tokio::task::JoinHandle<()> {
@@ -454,12 +461,12 @@ pub fn start_router_auto(service: AppService, cfg: RouterConfig) -> tokio::task:
};
let _ = start_router(service.clone(), cfg_ctx);
active.insert(ctx_id);
eprintln!("[router] started loop for context {}", ctx_id);
info!(context_id = ctx_id, "Started loop for context");
}
}
}
Err(e) => {
eprintln!("[router] list_context_ids error: {}", e);
error!(error=%e, "list_context_ids error");
}
}
tokio::time::sleep(std::time::Duration::from_secs(5)).await;

View File

@@ -410,11 +410,7 @@ pub fn build_module(state: Arc<AppState>) -> RpcModule<()> {
let state = state.clone();
async move {
let p: ActorLoadParams = params.parse().map_err(invalid_params_err)?;
let actor = state
.service
.load_actor(p.id)
.await
.map_err(storage_err)?;
let actor = state.service.load_actor(p.id).await.map_err(storage_err)?;
Ok::<_, ErrorObjectOwned>(actor)
}
})

View File

@@ -694,7 +694,7 @@ impl AppService {
Ok(())
}
/// Bypass-permission variant to update a job status with transition validation.
/// Bypass-permission variant to update a job status with transition validation.
/// This skips the executor permission check but enforces the same state transition rules.
pub async fn update_job_status_unchecked(
&self,

View File

@@ -1,4 +1,3 @@
pub mod redis;
pub use redis::RedisDriver;

View File

@@ -7,8 +7,10 @@ use serde_json::{Map as JsonMap, Value};
use tokio::sync::Mutex;
use crate::models::{
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageStatus, Runner, TransportStatus,
Actor, Context, Flow, FlowStatus, Job, JobStatus, Message, MessageStatus, Runner,
TransportStatus,
};
use tracing::{error, warn};
type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
@@ -52,8 +54,14 @@ impl RedisDriver {
// Slow path: create a new manager and cache it
let url = format!("{}/{}", self.base_addr.trim_end_matches('/'), db);
let client = redis::Client::open(url.as_str())?;
let cm = client.get_connection_manager().await?;
let client = redis::Client::open(url.as_str()).map_err(|e| {
error!(%url, db=%db, error=%e, "Redis client open failed");
e
})?;
let cm = client.get_connection_manager().await.map_err(|e| {
error!(%url, db=%db, error=%e, "Redis connection manager init failed");
e
})?;
let mut guard = self.managers.lock().await;
let entry = guard.entry(db).or_insert(cm);
@@ -104,21 +112,37 @@ impl RedisDriver {
async fn hset_model<T: Serialize>(&self, db: u32, key: &str, model: &T) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let pairs = Self::struct_to_hset_pairs(model)?;
let pairs = Self::struct_to_hset_pairs(model).map_err(|e| {
error!(db=%db, key=%key, error=%e, "Serialize model to HSET pairs failed");
e
})?;
// Ensure no stale fields
let _: u64 = cm.del(key).await.unwrap_or(0);
let del_res: redis::RedisResult<u64> = cm.del(key).await;
if let Err(e) = del_res {
warn!(db=%db, key=%key, error=%e, "DEL before HSET failed");
}
// Write all fields
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET multiple failed");
e
})?;
Ok(())
}
async fn hget_model<T: DeserializeOwned>(&self, db: u32, key: &str) -> Result<T> {
let mut cm = self.manager_for_db(db).await?;
let map: StdHashMap<String, String> = cm.hgetall(key).await?;
let map: StdHashMap<String, String> = cm.hgetall(key).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HGETALL failed");
e
})?;
if map.is_empty() {
// NotFound is expected in some flows; don't log as error
return Err(format!("Key not found: {}", key).into());
}
Self::hmap_to_struct(map)
Self::hmap_to_struct(map).map_err(|e| {
error!(db=%db, key=%key, error=%e, "Deserialize model from HGETALL failed");
e
})
}
// -----------------------------
@@ -196,7 +220,7 @@ impl RedisDriver {
let key = Self::actor_key(id);
self.hget_model(db, &key).await
}
/// Save an Actor globally in DB 0 (Actor is context-independent)
/// Save an Actor globally in DB 0 (Actor is context-independent)
pub async fn save_actor_global(&self, actor: &Actor) -> Result<()> {
let json = serde_json::to_value(actor)?;
let id = json
@@ -299,7 +323,10 @@ impl RedisDriver {
("status".to_string(), status_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_job_status failed");
e
})?;
Ok(())
}
@@ -345,7 +372,10 @@ impl RedisDriver {
("status".to_string(), status_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_status failed");
e
})?;
Ok(())
}
@@ -370,7 +400,10 @@ impl RedisDriver {
("status".to_string(), status_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_message_status failed");
e
})?;
Ok(())
}
@@ -404,7 +437,10 @@ impl RedisDriver {
let ts = crate::time::current_timestamp();
pairs.push(("updated_at".to_string(), ts.to_string()));
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_message_transport failed");
e
})?;
Ok(())
}
@@ -437,7 +473,10 @@ impl RedisDriver {
("env_vars".to_string(), env_vars_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_env_vars_merge failed");
e
})?;
Ok(())
}
@@ -470,7 +509,10 @@ impl RedisDriver {
("result".to_string(), result_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_result_merge failed");
e
})?;
Ok(())
}
@@ -504,7 +546,10 @@ impl RedisDriver {
("env_vars".to_string(), env_vars_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_job_env_vars_merge failed");
e
})?;
Ok(())
}
@@ -538,7 +583,10 @@ impl RedisDriver {
("result".to_string(), result_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_job_result_merge failed");
e
})?;
Ok(())
}
@@ -553,7 +601,10 @@ impl RedisDriver {
("jobs".to_string(), jobs_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET update_flow_jobs_set failed");
e
})?;
Ok(())
}
@@ -584,7 +635,10 @@ impl RedisDriver {
("logs".to_string(), logs_str),
("updated_at".to_string(), ts.to_string()),
];
let _: usize = cm.hset_multiple(key, &pairs).await?;
let _: () = cm.hset_multiple(&key, &pairs).await.map_err(|e| {
error!(db=%db, key=%key, error=%e, "HSET append_message_logs failed");
e
})?;
Ok(())
}
@@ -595,7 +649,10 @@ impl RedisDriver {
/// Push a value onto a Redis list using LPUSH in the given DB.
pub async fn lpush_list(&self, db: u32, list: &str, value: &str) -> Result<()> {
let mut cm = self.manager_for_db(db).await?;
let _: i64 = cm.lpush(list, value).await?;
let _: i64 = cm.lpush(list, value).await.map_err(|e| {
error!(db=%db, list=%list, value=%value, error=%e, "LPUSH failed");
e
})?;
Ok(())
}
@@ -615,7 +672,11 @@ impl RedisDriver {
.arg("msg_out")
.arg(timeout_secs)
.query_async(&mut cm)
.await?;
.await
.map_err(|e| {
error!(db=%db, timeout_secs=%timeout_secs, error=%e, "BRPOP failed");
e
})?;
Ok(res.map(|(_, v)| v))
}
@@ -632,7 +693,11 @@ impl RedisDriver {
.arg("COUNT")
.arg(100)
.query_async(&mut cm)
.await?;
.await
.map_err(|e| {
error!(db=%db, cursor=%cursor, error=%e, "SCAN failed");
e
})?;
for k in keys {
if let Ok(r) = self.hget_model::<Runner>(db, &k).await {
out.push(r);
@@ -653,7 +718,15 @@ impl RedisDriver {
/// Register a context id in the global set "contexts" stored in DB 0.
pub async fn register_context_id(&self, id: u32) -> Result<()> {
let mut cm = self.manager_for_db(0).await?;
let _: i64 = redis::cmd("SADD").arg("contexts").arg(id).query_async(&mut cm).await?;
let _: i64 = redis::cmd("SADD")
.arg("contexts")
.arg(id)
.query_async(&mut cm)
.await
.map_err(|e| {
error!(db=0, context_id=%id, error=%e, "SADD contexts failed");
e
})?;
Ok(())
}
@@ -661,7 +734,14 @@ impl RedisDriver {
pub async fn list_context_ids(&self) -> Result<Vec<u32>> {
let mut cm = self.manager_for_db(0).await?;
// Using SMEMBERS and parsing into u32
let vals: Vec<String> = redis::cmd("SMEMBERS").arg("contexts").query_async(&mut cm).await?;
let vals: Vec<String> = redis::cmd("SMEMBERS")
.arg("contexts")
.query_async(&mut cm)
.await
.map_err(|e| {
error!(db=0, error=%e, "SMEMBERS contexts failed");
e
})?;
let mut out = Vec::with_capacity(vals.len());
for v in vals {
if let Ok(n) = v.parse::<u32>() {