supervisor cleanup, documentation and minor fixes
This commit is contained in:
15
docs/architecture.md
Normal file
15
docs/architecture.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# Architecture
|
||||
|
||||
The Horus architecture consists of three layers:
|
||||
|
||||
1. Coordinator: A workflow engine that executes DAG-based flows by sending ready job steps to the targeted supervisors.
|
||||
2. Supervisor: A job dispatcher that routes jobs to the appropriate runners.
|
||||
3. Runner: A job executor that runs the actual job steps.
|
||||
|
||||
## Networking
|
||||
|
||||
- The user / client talks to the coordinator over an OpenRPC interface, using either regular HTTP transport or Mycelium.
|
||||
- The coordinator talks to the supervisor over an OpenRPC interface, using either regular HTTP transport or Mycelium.
|
||||
- The supervisor talks to runners over a Redis based job execution protocol.
|
||||
|
||||
|
||||
6
docs/glossary.md
Normal file
6
docs/glossary.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# Terminology
|
||||
|
||||
- Flow: A workflow that is executed by the coordinator.
|
||||
- Job: A unit of work that is executed by a runner.
|
||||
- Supervisor: A job dispatcher that routes jobs to the appropriate runners.
|
||||
- Runner: A job executor that runs the actual job steps.
|
||||
28
docs/supervisor/auth.md
Normal file
28
docs/supervisor/auth.md
Normal file
@@ -0,0 +1,28 @@
|
||||
## Supervisor Authentication
|
||||
|
||||
The supervisor has two authentication systems:
|
||||
|
||||
1. An authentication system based on scoped symmetric API keys.
|
||||
2. An authentication of the signatures of a job's canonical representation.
|
||||
|
||||
The first is used to control access to the supervisor API, the second is used to authenticate the signatories of a job, such that the runners can implement access control based on the signatories.
|
||||
|
||||
#### API Key Management
|
||||
|
||||
API keys are used to authenticate requests to the supervisor. They are created using the `auth.key.create` method and can be listed using the `key.list` method.
|
||||
|
||||
#### API Key Scopes
|
||||
|
||||
API keys have a scope that determines what actions they can perform. The following scopes are available:
|
||||
|
||||
- `admin`: Full access to all supervisor methods.
|
||||
- `registrar`: Access to methods related to job registration and management.
|
||||
- `user`: Access to methods related to job execution and management.
|
||||
|
||||
#### API Key Usage
|
||||
|
||||
API keys are passed as a header in the `Authorization` field of the request. The format is `Bearer <key>`.
|
||||
|
||||
#### API Key Rotation
|
||||
|
||||
API keys can be rotated using the `key.remove` method. This will invalidate the old key and create a new one.
|
||||
391
docs/supervisor/openrpc.json
Normal file
391
docs/supervisor/openrpc.json
Normal file
@@ -0,0 +1,391 @@
|
||||
{
|
||||
"openrpc": "1.3.2",
|
||||
"info": {
|
||||
"title": "Hero Supervisor OpenRPC API",
|
||||
"version": "1.0.0",
|
||||
"description": "OpenRPC API for managing Hero Supervisor runners and jobs. Job operations follow the convention: 'jobs.' for general operations and 'job.' for specific job operations."
|
||||
},
|
||||
"components": {
|
||||
"schemas": {
|
||||
"Job": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": { "type": "string" },
|
||||
"caller_id": { "type": "string" },
|
||||
"context_id": { "type": "string" },
|
||||
"payload": { "type": "string" },
|
||||
"runner": { "type": "string" },
|
||||
"executor": { "type": "string" },
|
||||
"timeout": { "type": "number" },
|
||||
"env_vars": { "type": "object" },
|
||||
"created_at": { "type": "string" },
|
||||
"updated_at": { "type": "string" }
|
||||
},
|
||||
"required": ["id", "caller_id", "context_id", "payload", "runner", "executor", "timeout", "env_vars", "created_at", "updated_at"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"methods": [
|
||||
{
|
||||
"name": "list_runners",
|
||||
"description": "List all registered runners",
|
||||
"params": [],
|
||||
"result": {
|
||||
"name": "runners",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "register_runner",
|
||||
"description": "Register a new runner to the supervisor with secret authentication",
|
||||
"params": [
|
||||
{
|
||||
"name": "params",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"secret": { "type": "string" },
|
||||
"name": { "type": "string" },
|
||||
"queue": { "type": "string" }
|
||||
},
|
||||
"required": ["secret", "name", "queue"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": { "type": "null" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "jobs.create",
|
||||
"description": "Create a new job without queuing it to a runner",
|
||||
"params": [
|
||||
{
|
||||
"name": "params",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"secret": { "type": "string" },
|
||||
"job": {
|
||||
"$ref": "#/components/schemas/Job"
|
||||
}
|
||||
},
|
||||
"required": ["secret", "job"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "job_id",
|
||||
"schema": { "type": "string" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "jobs.list",
|
||||
"description": "List all jobs",
|
||||
"params": [],
|
||||
"result": {
|
||||
"name": "jobs",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/components/schemas/Job" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job.run",
|
||||
"description": "Run a job on the appropriate runner and return the result",
|
||||
"params": [
|
||||
{
|
||||
"name": "params",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"secret": { "type": "string" },
|
||||
"job": {
|
||||
"$ref": "#/components/schemas/Job"
|
||||
}
|
||||
},
|
||||
"required": ["secret", "job"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"success": { "type": "string" }
|
||||
},
|
||||
"required": ["success"]
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"error": { "type": "string" }
|
||||
},
|
||||
"required": ["error"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job.start",
|
||||
"description": "Start a previously created job by queuing it to its assigned runner",
|
||||
"params": [
|
||||
{
|
||||
"name": "params",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"secret": { "type": "string" },
|
||||
"job_id": { "type": "string" }
|
||||
},
|
||||
"required": ["secret", "job_id"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": { "type": "null" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job.status",
|
||||
"description": "Get the current status of a job",
|
||||
"params": [
|
||||
{
|
||||
"name": "job_id",
|
||||
"schema": { "type": "string" }
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "status",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"job_id": { "type": "string" },
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": ["created", "queued", "running", "completed", "failed", "timeout"]
|
||||
},
|
||||
"created_at": { "type": "string" },
|
||||
"started_at": { "type": ["string", "null"] },
|
||||
"completed_at": { "type": ["string", "null"] }
|
||||
},
|
||||
"required": ["job_id", "status", "created_at"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job.result",
|
||||
"description": "Get the result of a completed job (blocks until result is available)",
|
||||
"params": [
|
||||
{
|
||||
"name": "job_id",
|
||||
"schema": { "type": "string" }
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"success": { "type": "string" }
|
||||
},
|
||||
"required": ["success"]
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"error": { "type": "string" }
|
||||
},
|
||||
"required": ["error"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "remove_runner",
|
||||
"description": "Remove a runner from the supervisor",
|
||||
"params": [
|
||||
{
|
||||
"name": "actor_id",
|
||||
"schema": { "type": "string" }
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": { "type": "null" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "start_runner",
|
||||
"description": "Start a specific runner",
|
||||
"params": [
|
||||
{
|
||||
"name": "actor_id",
|
||||
"schema": { "type": "string" }
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": { "type": "null" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "stop_runner",
|
||||
"description": "Stop a specific runner",
|
||||
"params": [
|
||||
{
|
||||
"name": "actor_id",
|
||||
"schema": { "type": "string" }
|
||||
},
|
||||
{
|
||||
"name": "force",
|
||||
"schema": { "type": "boolean" }
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": { "type": "null" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "get_runner_status",
|
||||
"description": "Get the status of a specific runner",
|
||||
"params": [
|
||||
{
|
||||
"name": "actor_id",
|
||||
"schema": { "type": "string" }
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "status",
|
||||
"schema": { "type": "object" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "get_all_runner_status",
|
||||
"description": "Get status of all runners",
|
||||
"params": [],
|
||||
"result": {
|
||||
"name": "statuses",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": { "type": "object" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "start_all",
|
||||
"description": "Start all runners",
|
||||
"params": [],
|
||||
"result": {
|
||||
"name": "results",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "stop_all",
|
||||
"description": "Stop all runners",
|
||||
"params": [
|
||||
{
|
||||
"name": "force",
|
||||
"schema": { "type": "boolean" }
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "results",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "get_all_status",
|
||||
"description": "Get status of all runners (alternative format)",
|
||||
"params": [],
|
||||
"result": {
|
||||
"name": "statuses",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job.stop",
|
||||
"description": "Stop a running job",
|
||||
"params": [
|
||||
{
|
||||
"name": "params",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"secret": { "type": "string" },
|
||||
"job_id": { "type": "string" }
|
||||
},
|
||||
"required": ["secret", "job_id"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": { "type": "null" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job.delete",
|
||||
"description": "Delete a job from the system",
|
||||
"params": [
|
||||
{
|
||||
"name": "params",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"secret": { "type": "string" },
|
||||
"job_id": { "type": "string" }
|
||||
},
|
||||
"required": ["secret", "job_id"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"result": {
|
||||
"name": "result",
|
||||
"schema": { "type": "null" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "rpc.discover",
|
||||
"description": "OpenRPC discovery method - returns the OpenRPC document describing this API",
|
||||
"params": [],
|
||||
"result": {
|
||||
"name": "openrpc_document",
|
||||
"schema": { "type": "object" }
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user