supervisor cleanup, documentation and minor fixes

This commit is contained in:
Timur Gordon
2025-11-14 01:47:13 +01:00
parent 94a66d9af4
commit d2ff7835e2
46 changed files with 520 additions and 4746 deletions

15
docs/architecture.md Normal file
View File

@@ -0,0 +1,15 @@
# Architecture
The Horus architecture consists of three layers:
1. Coordinator: A workflow engine that executes DAG-based flows by sending ready job steps to the targeted supervisors.
2. Supervisor: A job dispatcher that routes jobs to the appropriate runners.
3. Runner: A job executor that runs the actual job steps.
## Networking
- The user / client talks to the coordinator over an OpenRPC interface, using either regular HTTP transport or Mycelium.
- The coordinator talks to the supervisor over an OpenRPC interface, using either regular HTTP transport or Mycelium.
- The supervisor talks to runners over a Redis based job execution protocol.

6
docs/glossary.md Normal file
View File

@@ -0,0 +1,6 @@
# Terminology
- Flow: A workflow that is executed by the coordinator.
- Job: A unit of work that is executed by a runner.
- Supervisor: A job dispatcher that routes jobs to the appropriate runners.
- Runner: A job executor that runs the actual job steps.

28
docs/supervisor/auth.md Normal file
View File

@@ -0,0 +1,28 @@
## Supervisor Authentication
The supervisor has two authentication systems:
1. An authentication system based on scoped symmetric API keys.
2. An authentication of the signatures of a job's canonical representation.
The first is used to control access to the supervisor API, the second is used to authenticate the signatories of a job, such that the runners can implement access control based on the signatories.
#### API Key Management
API keys are used to authenticate requests to the supervisor. They are created using the `auth.key.create` method and can be listed using the `key.list` method.
#### API Key Scopes
API keys have a scope that determines what actions they can perform. The following scopes are available:
- `admin`: Full access to all supervisor methods.
- `registrar`: Access to methods related to job registration and management.
- `user`: Access to methods related to job execution and management.
#### API Key Usage
API keys are passed as a header in the `Authorization` field of the request. The format is `Bearer <key>`.
#### API Key Rotation
API keys can be rotated using the `key.remove` method. This will invalidate the old key and create a new one.

View File

@@ -0,0 +1,391 @@
{
"openrpc": "1.3.2",
"info": {
"title": "Hero Supervisor OpenRPC API",
"version": "1.0.0",
"description": "OpenRPC API for managing Hero Supervisor runners and jobs. Job operations follow the convention: 'jobs.' for general operations and 'job.' for specific job operations."
},
"components": {
"schemas": {
"Job": {
"type": "object",
"properties": {
"id": { "type": "string" },
"caller_id": { "type": "string" },
"context_id": { "type": "string" },
"payload": { "type": "string" },
"runner": { "type": "string" },
"executor": { "type": "string" },
"timeout": { "type": "number" },
"env_vars": { "type": "object" },
"created_at": { "type": "string" },
"updated_at": { "type": "string" }
},
"required": ["id", "caller_id", "context_id", "payload", "runner", "executor", "timeout", "env_vars", "created_at", "updated_at"]
}
}
},
"methods": [
{
"name": "list_runners",
"description": "List all registered runners",
"params": [],
"result": {
"name": "runners",
"schema": {
"type": "array",
"items": { "type": "string" }
}
}
},
{
"name": "register_runner",
"description": "Register a new runner to the supervisor with secret authentication",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"name": { "type": "string" },
"queue": { "type": "string" }
},
"required": ["secret", "name", "queue"]
}
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "jobs.create",
"description": "Create a new job without queuing it to a runner",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job": {
"$ref": "#/components/schemas/Job"
}
},
"required": ["secret", "job"]
}
}
],
"result": {
"name": "job_id",
"schema": { "type": "string" }
}
},
{
"name": "jobs.list",
"description": "List all jobs",
"params": [],
"result": {
"name": "jobs",
"schema": {
"type": "array",
"items": { "$ref": "#/components/schemas/Job" }
}
}
},
{
"name": "job.run",
"description": "Run a job on the appropriate runner and return the result",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job": {
"$ref": "#/components/schemas/Job"
}
},
"required": ["secret", "job"]
}
}
],
"result": {
"name": "result",
"schema": {
"oneOf": [
{
"type": "object",
"properties": {
"success": { "type": "string" }
},
"required": ["success"]
},
{
"type": "object",
"properties": {
"error": { "type": "string" }
},
"required": ["error"]
}
]
}
}
},
{
"name": "job.start",
"description": "Start a previously created job by queuing it to its assigned runner",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job_id": { "type": "string" }
},
"required": ["secret", "job_id"]
}
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "job.status",
"description": "Get the current status of a job",
"params": [
{
"name": "job_id",
"schema": { "type": "string" }
}
],
"result": {
"name": "status",
"schema": {
"type": "object",
"properties": {
"job_id": { "type": "string" },
"status": {
"type": "string",
"enum": ["created", "queued", "running", "completed", "failed", "timeout"]
},
"created_at": { "type": "string" },
"started_at": { "type": ["string", "null"] },
"completed_at": { "type": ["string", "null"] }
},
"required": ["job_id", "status", "created_at"]
}
}
},
{
"name": "job.result",
"description": "Get the result of a completed job (blocks until result is available)",
"params": [
{
"name": "job_id",
"schema": { "type": "string" }
}
],
"result": {
"name": "result",
"schema": {
"oneOf": [
{
"type": "object",
"properties": {
"success": { "type": "string" }
},
"required": ["success"]
},
{
"type": "object",
"properties": {
"error": { "type": "string" }
},
"required": ["error"]
}
]
}
}
},
{
"name": "remove_runner",
"description": "Remove a runner from the supervisor",
"params": [
{
"name": "actor_id",
"schema": { "type": "string" }
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "start_runner",
"description": "Start a specific runner",
"params": [
{
"name": "actor_id",
"schema": { "type": "string" }
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "stop_runner",
"description": "Stop a specific runner",
"params": [
{
"name": "actor_id",
"schema": { "type": "string" }
},
{
"name": "force",
"schema": { "type": "boolean" }
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "get_runner_status",
"description": "Get the status of a specific runner",
"params": [
{
"name": "actor_id",
"schema": { "type": "string" }
}
],
"result": {
"name": "status",
"schema": { "type": "object" }
}
},
{
"name": "get_all_runner_status",
"description": "Get status of all runners",
"params": [],
"result": {
"name": "statuses",
"schema": {
"type": "array",
"items": { "type": "object" }
}
}
},
{
"name": "start_all",
"description": "Start all runners",
"params": [],
"result": {
"name": "results",
"schema": {
"type": "array",
"items": {
"type": "array",
"items": { "type": "string" }
}
}
}
},
{
"name": "stop_all",
"description": "Stop all runners",
"params": [
{
"name": "force",
"schema": { "type": "boolean" }
}
],
"result": {
"name": "results",
"schema": {
"type": "array",
"items": {
"type": "array",
"items": { "type": "string" }
}
}
}
},
{
"name": "get_all_status",
"description": "Get status of all runners (alternative format)",
"params": [],
"result": {
"name": "statuses",
"schema": {
"type": "array",
"items": {
"type": "array",
"items": { "type": "string" }
}
}
}
},
{
"name": "job.stop",
"description": "Stop a running job",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job_id": { "type": "string" }
},
"required": ["secret", "job_id"]
}
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "job.delete",
"description": "Delete a job from the system",
"params": [
{
"name": "params",
"schema": {
"type": "object",
"properties": {
"secret": { "type": "string" },
"job_id": { "type": "string" }
},
"required": ["secret", "job_id"]
}
}
],
"result": {
"name": "result",
"schema": { "type": "null" }
}
},
{
"name": "rpc.discover",
"description": "OpenRPC discovery method - returns the OpenRPC document describing this API",
"params": [],
"result": {
"name": "openrpc_document",
"schema": { "type": "object" }
}
}
]
}