feat: Implement container networking and improve lifecycle

- Add thread-safe network management for containers
- Implement graceful and forceful container stopping
- Enhance container creation and deletion logic
- Refine image management and metadata handling
- Add container name validation for security
This commit is contained in:
Mahmoud-Emad
2025-11-12 10:38:39 +02:00
parent 1d67522937
commit 7836a48ad4
8 changed files with 806 additions and 104 deletions

View File

@@ -8,25 +8,47 @@ import time
import incubaid.herolib.builder
import json
// Container lifecycle timeout constants
const cleanup_retry_delay_ms = 500 // Time to wait for filesystem cleanup to complete
const sigterm_timeout_ms = 5000 // Time to wait for graceful shutdown (5 seconds)
const sigkill_wait_ms = 500 // Time to wait after SIGKILL
const stop_check_interval_ms = 500 // Interval to check if container stopped
// Container represents a running or stopped OCI container managed by crun
//
// Thread Safety:
// Container operations that interact with network configuration (start, stop, delete)
// are thread-safe because they delegate to HeroPods.network_* methods which use
// the network_mutex for protection.
@[heap]
pub struct Container {
pub mut:
name string
node ?&builder.Node
tmux_pane ?&tmux.Pane
crun_config ?&crun.CrunConfig
factory &HeroPods
name string // Unique container name
node ?&builder.Node // Builder node for executing commands inside container
tmux_pane ?&tmux.Pane // Optional tmux pane for interactive access
crun_config ?&crun.CrunConfig // OCI runtime configuration
factory &HeroPods // Reference to parent HeroPods instance
}
// Struct to parse JSON output of `crun state`
// CrunState represents the JSON output of `crun state` command
struct CrunState {
id string
status string
pid int
bundle string
created string
id string // Container ID
status string // Container status (running, stopped, paused)
pid int // PID of container init process
bundle string // Path to OCI bundle
created string // Creation timestamp
}
// Start the container
//
// This method handles the complete container startup lifecycle:
// 1. Creates the container in crun if it doesn't exist
// 2. Handles leftover state cleanup if creation fails
// 3. Starts the container process
// 4. Sets up networking (thread-safe via network_mutex)
//
// Thread Safety:
// Network setup is thread-safe via HeroPods.network_setup_container()
pub fn (mut self Container) start() ! {
// Check if container exists in crun
container_exists := self.container_exists_in_crun()!
@@ -37,7 +59,7 @@ pub fn (mut self Container) start() ! {
// Try to create the container, if it fails with "File exists" error,
// try to force delete any leftover state and retry
crun_root := '${self.factory.base_dir}/runtime'
create_result := osal.exec(
_ := osal.exec(
cmd: 'crun --root ${crun_root} create --bundle ${self.factory.base_dir}/configs/${self.name} ${self.name}'
stdout: true
) or {
@@ -50,7 +72,7 @@ pub fn (mut self Container) start() ! {
osal.exec(cmd: 'rm -rf ${crun_root}/${self.name}', stdout: false) or {}
osal.exec(cmd: 'rm -rf /run/crun/${self.name}', stdout: false) or {}
// Wait a moment for cleanup to complete
time.sleep(500 * time.millisecond)
time.sleep(cleanup_retry_delay_ms * time.millisecond)
// Retry creation
osal.exec(
cmd: 'crun --root ${crun_root} create --bundle ${self.factory.base_dir}/configs/${self.name} ${self.name}'
@@ -84,10 +106,34 @@ pub fn (mut self Container) start() ! {
// start the container (crun start doesn't have --detach flag)
crun_root := '${self.factory.base_dir}/runtime'
osal.exec(cmd: 'crun --root ${crun_root} start ${self.name}', stdout: true)!
// Start the container
osal.exec(cmd: 'crun --root ${crun_root} start ${self.name}', stdout: true) or {
return error('Failed to start container: ${err}')
}
// Setup network for the container (thread-safe)
// If this fails, stop the container to clean up
self.setup_network() or {
console.print_stderr('Network setup failed, stopping container: ${err}')
// Use stop() method to properly clean up (kills process, cleans network, etc.)
// Ignore errors from stop since we're already in an error path
self.stop() or { console.print_debug('Failed to stop container during cleanup: ${err}') }
return error('Failed to setup network for container: ${err}')
}
console.print_green('Container ${self.name} started')
}
// Stop the container gracefully (SIGTERM) or forcefully (SIGKILL)
//
// This method:
// 1. Sends SIGTERM for graceful shutdown
// 2. Waits up to sigterm_timeout_ms for graceful stop
// 3. Sends SIGKILL if still running after timeout
// 4. Cleans up network resources (thread-safe)
//
// Thread Safety:
// Network cleanup is thread-safe via HeroPods.network_cleanup_container()
pub fn (mut self Container) stop() ! {
status := self.status()!
if status == .stopped {
@@ -96,28 +142,90 @@ pub fn (mut self Container) stop() ! {
}
crun_root := '${self.factory.base_dir}/runtime'
osal.exec(cmd: 'crun --root ${crun_root} kill ${self.name} SIGTERM', stdout: false) or {}
time.sleep(2 * time.second)
// Force kill if still running
if self.status()! == .running {
osal.exec(cmd: 'crun --root ${crun_root} kill ${self.name} SIGKILL', stdout: false) or {}
// Send SIGTERM for graceful shutdown
osal.exec(cmd: 'crun --root ${crun_root} kill ${self.name} SIGTERM', stdout: false) or {
console.print_debug('Failed to send SIGTERM (container may already be stopped): ${err}')
}
// Wait up to sigterm_timeout_ms for graceful shutdown
mut attempts := 0
max_attempts := sigterm_timeout_ms / stop_check_interval_ms
for attempts < max_attempts {
time.sleep(stop_check_interval_ms * time.millisecond)
current_status := self.status() or {
// If we can't get status, assume it's stopped (container may have been deleted)
ContainerStatus.stopped
}
if current_status == .stopped {
console.print_debug('Container ${self.name} stopped gracefully')
self.cleanup_network()! // Thread-safe network cleanup
console.print_green('Container ${self.name} stopped')
return
}
attempts++
}
// Force kill if still running after timeout
console.print_debug('Container ${self.name} did not stop gracefully, force killing')
osal.exec(cmd: 'crun --root ${crun_root} kill ${self.name} SIGKILL', stdout: false) or {
console.print_debug('Failed to send SIGKILL: ${err}')
}
// Wait for SIGKILL to take effect
time.sleep(sigkill_wait_ms * time.millisecond)
// Verify it's actually stopped
final_status := self.status() or {
// If we can't get status, assume it's stopped (container may have been deleted)
ContainerStatus.stopped
}
if final_status != .stopped {
return error('Failed to stop container ${self.name} - status: ${final_status}')
}
// Cleanup network resources (thread-safe)
self.cleanup_network()!
console.print_green('Container ${self.name} stopped')
}
// Delete the container
//
// This method:
// 1. Checks if container exists in crun
// 2. Stops the container (which cleans up network)
// 3. Deletes the container from crun
// 4. Removes from factory's container cache
//
// Thread Safety:
// Network cleanup is thread-safe via stop() -> cleanup_network()
pub fn (mut self Container) delete() ! {
// Check if container exists before trying to delete
if !self.container_exists_in_crun()! {
console.print_debug('Container ${self.name} does not exist, nothing to delete')
console.print_debug('Container ${self.name} does not exist in crun')
// Still cleanup network resources in case they exist (thread-safe)
self.cleanup_network() or {
console.print_debug('Network cleanup failed (may not exist): ${err}')
}
// Remove from factory's container cache only after all cleanup is done
if self.name in self.factory.containers {
self.factory.containers.delete(self.name)
}
console.print_debug('Container ${self.name} removed from cache')
return
}
// Stop the container (this will cleanup network via stop())
self.stop()!
crun_root := '${self.factory.base_dir}/runtime'
osal.exec(cmd: 'crun --root ${crun_root} delete ${self.name}', stdout: false) or {}
// Remove from factory's container cache
// Delete the container from crun
crun_root := '${self.factory.base_dir}/runtime'
osal.exec(cmd: 'crun --root ${crun_root} delete ${self.name}', stdout: false) or {
console.print_debug('Failed to delete container from crun: ${err}')
}
// Remove from factory's container cache only after all cleanup is complete
if self.name in self.factory.containers {
self.factory.containers.delete(self.name)
}
@@ -135,24 +243,92 @@ pub fn (mut self Container) exec(cmd_ osal.Command) !string {
// Use the builder node to execute inside container
mut node := self.node()!
console.print_debug('Executing command in container ${self.name}: ${cmd_.cmd}')
return node.exec(cmd: cmd_.cmd, stdout: cmd_.stdout)
// Execute and provide better error context
return node.exec(cmd: cmd_.cmd, stdout: cmd_.stdout) or {
// Check if container still exists to provide better error message
if !self.container_exists_in_crun()! {
return error('Container ${self.name} was deleted during command execution')
}
return error('Command execution failed in container ${self.name}: ${err}')
}
}
pub fn (self Container) status() !ContainerStatus {
crun_root := '${self.factory.base_dir}/runtime'
result := osal.exec(cmd: 'crun --root ${crun_root} state ${self.name}', stdout: false) or {
return .unknown
// Container doesn't exist - this is expected in some cases (e.g., before creation)
// Check error message to distinguish between "not found" and real errors
err_msg := err.msg().to_lower()
if err_msg.contains('does not exist') || err_msg.contains('not found')
|| err_msg.contains('no such') {
return .stopped
}
// Real error (permissions, crun not installed, etc.) - propagate it
return error('Failed to get container status: ${err}')
}
// Parse JSON output from crun state
state := json.decode(CrunState, result.output) or { return .unknown }
return match state.status {
'running' { .running }
'stopped' { .stopped }
'paused' { .paused }
else { .unknown }
state := json.decode(CrunState, result.output) or {
return error('Failed to parse container state JSON: ${err}')
}
status_result := match state.status {
'running' {
ContainerStatus.running
}
'stopped' {
ContainerStatus.stopped
}
'paused' {
ContainerStatus.paused
}
else {
console.print_debug('Unknown container status: ${state.status}')
ContainerStatus.unknown
}
}
return status_result
}
// Get the PID of the container's init process
pub fn (self Container) pid() !int {
crun_root := '${self.factory.base_dir}/runtime'
result := osal.exec(
cmd: 'crun --root ${crun_root} state ${self.name}'
stdout: false
)!
// Parse JSON output from crun state
state := json.decode(CrunState, result.output)!
if state.pid == 0 {
return error('Container ${self.name} has no PID (not running?)')
}
return state.pid
}
// Setup network for this container (thread-safe)
//
// Delegates to HeroPods.network_setup_container() which uses network_mutex
// for thread-safe IP allocation and network configuration.
fn (mut self Container) setup_network() ! {
// Get container PID
container_pid := self.pid()!
// Delegate to factory's network setup (thread-safe)
mut factory := self.factory
factory.network_setup_container(self.name, container_pid)!
}
// Cleanup network for this container (thread-safe)
//
// Delegates to HeroPods.network_cleanup_container() which uses network_mutex
// for thread-safe IP deallocation and network cleanup.
fn (mut self Container) cleanup_network() ! {
mut factory := self.factory
factory.network_cleanup_container(self.name)!
}
// Check if container exists in crun (regardless of its state)
@@ -167,11 +343,12 @@ fn (self Container) container_exists_in_crun() !bool {
return result.exit_code == 0
}
// ContainerStatus represents the current state of a container
pub enum ContainerStatus {
running
stopped
paused
unknown
running // Container is running
stopped // Container is stopped or doesn't exist
paused // Container is paused
unknown // Unknown status (error case)
}
// Get CPU usage in percentage

View File

@@ -6,25 +6,44 @@ import incubaid.herolib.virt.crun
import incubaid.herolib.installers.virt.herorunner as herorunner_installer
import os
// Updated enum to be more flexible
// ContainerImageType defines the available container base images
pub enum ContainerImageType {
alpine_3_20
ubuntu_24_04
ubuntu_25_04
custom // For custom images downloaded via podman
alpine_3_20 // Alpine Linux 3.20
ubuntu_24_04 // Ubuntu 24.04 LTS
ubuntu_25_04 // Ubuntu 25.04
custom // Custom image downloaded via podman
}
// ContainerNewArgs defines parameters for creating a new container
@[params]
pub struct ContainerNewArgs {
pub:
name string @[required]
image ContainerImageType = .alpine_3_20
name string @[required] // Unique container name
image ContainerImageType = .alpine_3_20 // Base image type
custom_image_name string // Used when image = .custom
docker_url string // Docker image URL for new images
reset bool
reset bool // Reset if container already exists
}
// Create a new container
//
// This method:
// 1. Validates the container name
// 2. Determines the image to use (built-in or custom)
// 3. Creates crun configuration
// 4. Installs hero binary in rootfs
// 5. Configures DNS in rootfs
//
// Note: The actual container creation in crun happens when start() is called.
// This method only prepares the configuration and rootfs.
//
// Thread Safety:
// This method doesn't interact with network_config, so no mutex is needed.
// Network setup happens later in container.start().
pub fn (mut self HeroPods) container_new(args ContainerNewArgs) !&Container {
// Validate container name to prevent shell injection and path traversal
validate_container_name(args.name) or { return error('Invalid container name: ${err}') }
if args.name in self.containers && !args.reset {
return self.containers[args.name] or { panic('bug: container should exist') }
}
@@ -85,13 +104,22 @@ pub fn (mut self HeroPods) container_new(args ContainerNewArgs) !&Container {
self.containers[args.name] = container
// Always install hero binary in container rootfs
// Install hero binary in container rootfs
self.install_hero_in_rootfs(rootfs_path)!
// Configure DNS in container rootfs (uses network_config but doesn't modify it)
self.network_configure_dns(args.name, rootfs_path)!
return container
}
// Create crun configuration using the crun module
// Create crun configuration for a container
//
// This creates an OCI-compliant runtime configuration with:
// - No terminal (background container)
// - Long-running sleep process
// - Standard environment variables
// - Resource limits
fn (mut self HeroPods) create_crun_config(container_name string, rootfs_path string) !&crun.CrunConfig {
// Create crun configuration using the factory pattern
mut config := crun.new(mut self.crun_configs, name: container_name)!
@@ -107,7 +135,7 @@ fn (mut self HeroPods) create_crun_config(container_name string, rootfs_path str
config.set_hostname('container')
config.set_no_new_privileges(true)
// Add the specific rlimit for file descriptors
// Add resource limits
config.add_rlimit(.rlimit_nofile, 1024, 1024)
// Validate the configuration
@@ -123,7 +151,13 @@ fn (mut self HeroPods) create_crun_config(container_name string, rootfs_path str
return config
}
// Use podman to pull image and extract rootfs
// Pull a Docker image using podman and extract its rootfs
//
// This method:
// 1. Pulls the image from Docker registry
// 2. Creates a temporary container from the image
// 3. Exports the container filesystem to rootfs_path
// 4. Cleans up the temporary container
fn (self HeroPods) podman_pull_and_export(docker_url string, image_name string, rootfs_path string) ! {
// Pull image
osal.exec(
@@ -156,8 +190,12 @@ fn (self HeroPods) podman_pull_and_export(docker_url string, image_name string,
}
// Install hero binary into container rootfs
// This copies the hero binary from the host into the container's rootfs
// If the hero binary doesn't exist on the host, it will be compiled first
//
// This method:
// 1. Checks if hero binary already exists in rootfs
// 2. If not, copies from host (~/hero/bin/hero)
// 3. If host binary doesn't exist, compiles it first
// 4. Makes the binary executable
fn (mut self HeroPods) install_hero_in_rootfs(rootfs_path string) ! {
console.print_debug('Installing hero binary into container rootfs: ${rootfs_path}')

View File

@@ -5,40 +5,57 @@ import incubaid.herolib.osal.core as osal
import incubaid.herolib.core.texttools
import os
// ContainerImage represents a container base image with its rootfs
//
// Thread Safety:
// Image operations are filesystem-based and don't interact with network_config,
// so no special thread safety considerations are needed.
@[heap]
pub struct ContainerImage {
pub mut:
image_name string @[required] // image is located in ${self.factory.base_dir}/images/<image_name>/rootfs
docker_url string // optional docker image URL
rootfs_path string // path to the extracted rootfs
size_mb f64 // size in MB
created_at string // creation timestamp
factory &HeroPods @[skip; str: skip]
image_name string @[required] // Image name (located in ${self.factory.base_dir}/images/<image_name>/rootfs)
docker_url string // Optional Docker registry URL
rootfs_path string // Path to the extracted rootfs
size_mb f64 // Size in MB
created_at string // Creation timestamp
factory &HeroPods @[skip; str: skip] // Reference to parent HeroPods instance
}
// ContainerImageArgs defines parameters for creating/managing container images
@[params]
pub struct ContainerImageArgs {
pub mut:
image_name string @[required] // image is located in ${self.factory.base_dir}/images/<image_name>/rootfs
docker_url string // docker image URL like "alpine:3.20" or "ubuntu:24.04"
reset bool
image_name string @[required] // Unique image name (located in ${self.factory.base_dir}/images/<image_name>/rootfs)
docker_url string // Docker image URL like "alpine:3.20" or "ubuntu:24.04"
reset bool // Reset if image already exists
}
// ImageExportArgs defines parameters for exporting an image
@[params]
pub struct ImageExportArgs {
pub mut:
dest_path string @[required] // destination .tgz file path
compress_level int = 6 // compression level 1-9
dest_path string @[required] // Destination .tgz file path
compress_level int = 6 // Compression level 1-9
}
// ImageImportArgs defines parameters for importing an image
@[params]
pub struct ImageImportArgs {
pub mut:
source_path string @[required] // source .tgz file path
reset bool // overwrite if exists
source_path string @[required] // Source .tgz file path
reset bool // Overwrite if exists
}
// Create new image or get existing
// Create a new image or get existing image
//
// This method:
// 1. Normalizes the image name
// 2. Returns existing image if found (unless reset=true)
// 3. Downloads image from Docker registry if docker_url provided
// 4. Creates image metadata and stores in cache
//
// Thread Safety:
// Image operations are filesystem-based and don't interact with network_config.
pub fn (mut self HeroPods) image_new(args ContainerImageArgs) !&ContainerImage {
mut image_name := texttools.name_fix(args.image_name)
rootfs_path := '${self.base_dir}/images/${image_name}/rootfs'
@@ -77,7 +94,13 @@ pub fn (mut self HeroPods) image_new(args ContainerImageArgs) !&ContainerImage {
return image
}
// Download image from docker registry using podman
// Download image from Docker registry using podman
//
// This method:
// 1. Pulls the image from Docker registry
// 2. Creates a temporary container
// 3. Exports the rootfs to the images directory
// 4. Cleans up the temporary container
fn (mut self ContainerImage) download_from_docker(docker_url string, reset bool) ! {
console.print_header('Downloading image: ${docker_url}')
@@ -119,12 +142,14 @@ fn (mut self ContainerImage) download_from_docker(docker_url string, reset bool)
}
// Update image metadata (size, creation time, etc.)
//
// Calculates the rootfs size and records creation timestamp
fn (mut self ContainerImage) update_metadata() ! {
if !os.is_dir(self.rootfs_path) {
return error('Rootfs path does not exist: ${self.rootfs_path}')
}
// Calculate size
// Calculate size in MB
result := osal.exec(cmd: 'du -sm ${self.rootfs_path}', stdout: false)!
result_parts := result.output.split_by_space()[0] or { panic('bug') }
size_str := result_parts.trim_space()
@@ -132,10 +157,12 @@ fn (mut self ContainerImage) update_metadata() ! {
// Get creation time
info := os.stat(self.rootfs_path) or { return error('stat failed: ${err}') }
self.created_at = info.ctime.str() // or mtime.str(), depending on what you want
self.created_at = info.ctime.str()
}
// List all available images
//
// Scans the images directory and returns all found images with metadata
pub fn (mut self HeroPods) images_list() ![]&ContainerImage {
mut images := []&ContainerImage{}
@@ -173,6 +200,8 @@ pub fn (mut self HeroPods) images_list() ![]&ContainerImage {
}
// Export image to .tgz file
//
// Creates a compressed tarball of the image rootfs
pub fn (mut self ContainerImage) export(args ImageExportArgs) ! {
if !os.is_dir(self.rootfs_path) {
return error('Image rootfs not found: ${self.rootfs_path}')
@@ -192,6 +221,8 @@ pub fn (mut self ContainerImage) export(args ImageExportArgs) ! {
}
// Import image from .tgz file
//
// Extracts a compressed tarball into the images directory and creates image metadata
pub fn (mut self HeroPods) image_import(args ImageImportArgs) !&ContainerImage {
if !os.exists(args.source_path) {
return error('Source file not found: ${args.source_path}')
@@ -238,6 +269,8 @@ pub fn (mut self HeroPods) image_import(args ImageImportArgs) !&ContainerImage {
}
// Delete image
//
// Removes the image directory and removes from factory cache
pub fn (mut self ContainerImage) delete() ! {
console.print_header('Deleting image: ${self.image_name}')
@@ -255,6 +288,8 @@ pub fn (mut self ContainerImage) delete() ! {
}
// Get image info as map
//
// Returns image metadata as a string map for display/serialization
pub fn (self ContainerImage) info() map[string]string {
return {
'name': self.image_name
@@ -265,7 +300,9 @@ pub fn (self ContainerImage) info() map[string]string {
}
}
// List available docker images that can be downloaded
// List available Docker images that can be downloaded
//
// Returns a curated list of commonly used Docker images
pub fn list_available_docker_images() []string {
return [
'alpine:3.20',

View File

@@ -4,6 +4,14 @@ import incubaid.herolib.core.base
import incubaid.herolib.core.playbook { PlayBook }
import json
// Global state for HeroPods instances
//
// Thread Safety Note:
// heropods_global is not marked as `shared` because it would break compile-time
// reflection in paramsparser. The map operations are generally safe for concurrent
// read access. For write operations, the Redis backend provides the source of truth
// and synchronization. Each HeroPods instance has its own network_mutex for
// protecting network operations.
__global (
heropods_global map[string]&HeroPods
heropods_default string
@@ -31,9 +39,12 @@ pub fn new(args ArgsGet) !&HeroPods {
return get(name: args.name)!
}
// Get a HeroPods instance by name
// If fromdb is true, loads from Redis; otherwise returns from memory cache
pub fn get(args ArgsGet) !&HeroPods {
mut context := base.context()!
heropods_default = args.name
if args.fromdb || args.name !in heropods_global {
mut r := context.redis()!
if r.hexists('context:heropods', args.name)! {
@@ -52,15 +63,16 @@ pub fn get(args ArgsGet) !&HeroPods {
return error("HeroPods with name '${args.name}' does not exist")
}
}
return get(args)! // no longer from db nor create
return get(args)! // Recursive call with fromdb=false
}
return heropods_global[args.name] or {
print_backtrace()
return error('could not get config for heropods with name:${args.name}')
}
}
// register the config for the future
// Register a HeroPods instance (saves to both memory and Redis)
pub fn set(o HeroPods) ! {
mut o2 := set_in_mem(o)!
heropods_default = o2.name
@@ -69,13 +81,14 @@ pub fn set(o HeroPods) ! {
r.hset('context:heropods', o2.name, json.encode(o2))!
}
// does the config exists?
// Check if a HeroPods instance exists in Redis
pub fn exists(args ArgsGet) !bool {
mut context := base.context()!
mut r := context.redis()!
return r.hexists('context:heropods', args.name)!
}
// Delete a HeroPods instance from Redis (does not affect memory cache)
pub fn delete(args ArgsGet) ! {
mut context := base.context()!
mut r := context.redis()!
@@ -88,33 +101,36 @@ pub mut:
fromdb bool // will load from filesystem
}
// if fromdb set: load from filesystem, and not from mem, will also reset what is in mem
// List all HeroPods instances
// If fromdb is true, loads from Redis and resets memory cache
// If fromdb is false, returns from memory cache
pub fn list(args ArgsList) ![]&HeroPods {
mut res := []&HeroPods{}
mut context := base.context()!
if args.fromdb {
// reset what is in mem
// Reset memory cache and load from Redis
heropods_global = map[string]&HeroPods{}
heropods_default = ''
}
if args.fromdb {
mut r := context.redis()!
mut l := r.hkeys('context:heropods')!
for name in l {
res << get(name: name, fromdb: true)!
}
return res
} else {
// load from memory
// Load from memory cache
for _, client in heropods_global {
res << client
}
}
return res
}
// only sets in mem, does not set as config
// Set a HeroPods instance in memory cache only (does not persist to Redis)
// Initializes the instance via obj_init before caching
fn set_in_mem(o HeroPods) !HeroPods {
mut o2 := obj_init(o)!
heropods_global[o2.name] = &o2
@@ -226,7 +242,11 @@ pub fn play(mut plbook PlayBook) ! {
}
}
// switch instance to be used for heropods
// Switch the default HeroPods instance
//
// Thread Safety Note:
// String assignment is atomic on most platforms, so no explicit locking is needed.
// If strict thread safety is required in the future, this could be wrapped in a lock.
pub fn switch(name string) {
heropods_default = name
}

View File

@@ -5,24 +5,31 @@ import incubaid.herolib.osal.core as osal
import incubaid.herolib.ui.console
import incubaid.herolib.virt.crun
import os
import sync
pub const version = '0.0.0'
const singleton = false
const default = true
// THIS THE THE SOURCE OF THE INFORMATION OF THIS FILE, HERE WE HAVE THE CONFIG OBJECT CONFIGURED AND MODELLED
// HeroPods factory for managing containers
//
// Thread Safety:
// The network_config field is protected by network_mutex for thread-safe concurrent access.
// We use a separate mutex instead of marking network_config as `shared` because V's
// compile-time reflection (used by paramsparser) cannot handle shared fields.
@[heap]
pub struct HeroPods {
pub mut:
tmux_session string // tmux session name
containers map[string]&Container // name -> container mapping
images map[string]&ContainerImage // name -> image mapping
crun_configs map[string]&crun.CrunConfig // name -> crun config mapping
base_dir string // base directory for all container data
reset bool // will reset the heropods
use_podman bool = true // will use podman for image management
name string // name of the heropods
tmux_session string // tmux session name
containers map[string]&Container // name -> container mapping
images map[string]&ContainerImage // name -> image mapping
crun_configs map[string]&crun.CrunConfig // name -> crun config mapping
base_dir string // base directory for all container data
reset bool // will reset the heropods
use_podman bool = true // will use podman for image management
name string // name of the heropods
network_config NetworkConfig @[skip; str: skip] // network configuration (automatically initialized, not serialized)
network_mutex sync.Mutex @[skip; str: skip] // protects network_config for thread-safe concurrent access
}
// your checking & initialization code if needed
@@ -46,22 +53,31 @@ fn obj_init(mycfg_ HeroPods) !HeroPods {
}
}
// Initialize HeroPods instance with network configuration
// Note: network_mutex is automatically initialized to zero value (unlocked state)
mut heropods := HeroPods{
tmux_session: args.name
containers: map[string]&Container{}
images: map[string]&ContainerImage{}
crun_configs: map[string]&crun.CrunConfig{}
base_dir: args.base_dir
reset: args.reset
use_podman: args.use_podman
name: args.name
tmux_session: args.name
containers: map[string]&Container{}
images: map[string]&ContainerImage{}
crun_configs: map[string]&crun.CrunConfig{}
base_dir: args.base_dir
reset: args.reset
use_podman: args.use_podman
name: args.name
network_config: NetworkConfig{
allocated_ips: map[string]string{}
}
}
// Clean up any leftover crun state if reset is requested
if args.reset {
heropods.cleanup_crun_state()!
heropods.network_cleanup_all(false)! // Keep bridge for reuse
}
// Initialize network layer
heropods.network_init()!
// Load existing images into cache
heropods.load_existing_images()!
@@ -70,7 +86,7 @@ fn obj_init(mycfg_ HeroPods) !HeroPods {
heropods.setup_default_images(args.reset)!
}
return args
return heropods
}
/////////////NORMALLY NO NEED TO TOUCH
@@ -92,7 +108,7 @@ fn (mut self HeroPods) setup_default_images(reset bool) ! {
}
if img.str() !in self.images || reset {
console.print_debug('Preparing default image: ${img.str()}')
_ = self.image_new(args)!
self.image_new(args)!
}
}
}

View File

@@ -1,5 +0,0 @@
- use builder... for remote execution inside the container
- make an executor like we have for SSH but then for the container, so we can use this to execute commands inside the container
-

384
lib/virt/heropods/network.v Normal file
View File

@@ -0,0 +1,384 @@
module heropods
import incubaid.herolib.osal.core as osal
import incubaid.herolib.ui.console
import os
import crypto.sha256
// Network configuration for HeroPods
//
// This module provides container networking similar to Docker/Podman:
// - Bridge networking with automatic IP allocation
// - NAT for outbound internet access
// - DNS configuration
// - veth pair management
//
// Thread Safety:
// All network_config operations are protected by HeroPods.network_mutex.
// The struct is not marked as `shared` to maintain compatibility with
// paramsparser's compile-time reflection.
//
// Future extension possibilities:
// - IPv6 support
// - Custom per-container DNS servers
// - iptables isolation (firewall per container)
// - Multiple bridges for isolated networks
// - Port forwarding/mapping
// - Network policies and traffic shaping
// NetworkConfig holds network configuration for HeroPods containers
struct NetworkConfig {
mut:
bridge_name string = 'heropods0'
subnet string = '10.10.0.0/24'
gateway_ip string = '10.10.0.1'
dns_servers []string = ['8.8.8.8', '8.8.4.4']
allocated_ips map[string]string // container_name -> IP address
freed_ip_pool []int // Pool of freed IP offsets for reuse (e.g., [15, 23, 42])
next_ip_offset int = 10 // Start allocating from 10.10.0.10 (only used when pool is empty)
}
// Initialize network configuration in HeroPods factory
fn (mut self HeroPods) network_init() ! {
console.print_debug('Initializing HeroPods network layer...')
// Setup host bridge if it doesn't exist
self.network_setup_bridge()!
console.print_debug('HeroPods network layer initialized')
}
// Setup the host bridge network (one-time setup, idempotent)
fn (mut self HeroPods) network_setup_bridge() ! {
bridge_name := self.network_config.bridge_name
gateway_ip := '${self.network_config.gateway_ip}/${self.network_config.subnet.split('/')[1]}'
subnet := self.network_config.subnet
// Check if bridge already exists
result := osal.exec(
cmd: 'ip link show ${bridge_name}'
stdout: false
raise_error: false
) or {
osal.Job{
exit_code: 1
}
}
if result.exit_code == 0 {
console.print_debug('Bridge ${bridge_name} already exists')
return
}
console.print_debug('Creating bridge ${bridge_name}...')
// Create bridge
osal.exec(
cmd: 'ip link add name ${bridge_name} type bridge'
stdout: false
)!
// Assign IP to bridge
osal.exec(
cmd: 'ip addr add ${gateway_ip} dev ${bridge_name}'
stdout: false
)!
// Bring bridge up
osal.exec(
cmd: 'ip link set ${bridge_name} up'
stdout: false
)!
// Enable IP forwarding (with error resilience)
osal.exec(
cmd: 'sysctl -w net.ipv4.ip_forward=1'
stdout: false
) or {
console.print_stderr('Warning: Failed to enable IPv4 forwarding. Containers may not have internet access.')
console.print_debug('You may need to run: sudo sysctl -w net.ipv4.ip_forward=1')
}
// Get primary network interface for NAT
primary_iface := self.network_get_primary_interface() or {
console.print_stderr('Warning: Could not detect primary network interface. NAT may not work.')
'eth0' // fallback
}
// Setup NAT for outbound traffic (with error resilience)
console.print_debug('Setting up NAT rules for ${primary_iface}...')
osal.exec(
cmd: 'iptables -t nat -C POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE 2>/dev/null || iptables -t nat -A POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE'
stdout: false
) or {
console.print_stderr('Warning: Failed to setup NAT rules. Containers may not have internet access.')
console.print_debug('You may need to run: sudo iptables -t nat -A POSTROUTING -s ${subnet} -o ${primary_iface} -j MASQUERADE')
}
console.print_green('Bridge ${bridge_name} created and configured')
}
// Get the primary network interface for NAT
fn (self HeroPods) network_get_primary_interface() !string {
// Try to get the default route interface
result := osal.exec(
cmd: "ip route | grep default | awk '{print \$5}' | head -n1"
stdout: false
)!
iface := result.output.trim_space()
if iface == '' {
return error('Could not determine primary network interface')
}
return iface
}
// Allocate an IP address for a container (thread-safe)
//
// IP REUSE STRATEGY:
// 1. First, try to reuse an IP from the freed_ip_pool (recycled IPs from deleted containers)
// 2. If pool is empty, allocate a new IP by incrementing next_ip_offset
// 3. This prevents IP exhaustion in a /24 subnet (254 usable IPs)
//
// Thread Safety:
// This function uses network_mutex to ensure atomic IP allocation.
// Multiple concurrent container starts will be serialized at the IP allocation step,
// preventing race conditions where two containers could receive the same IP.
fn (mut self HeroPods) network_allocate_ip(container_name string) !string {
self.network_mutex.@lock()
defer {
self.network_mutex.unlock()
}
// Check if already allocated
if container_name in self.network_config.allocated_ips {
return self.network_config.allocated_ips[container_name]
}
// Extract base IP from subnet (e.g., "10.10.0.0/24" -> "10.10.0")
subnet_parts := self.network_config.subnet.split('/')
base_ip_parts := subnet_parts[0].split('.')
base_ip := '${base_ip_parts[0]}.${base_ip_parts[1]}.${base_ip_parts[2]}'
// Determine IP offset: reuse from pool first, then increment
mut ip_offset := 0
if self.network_config.freed_ip_pool.len > 0 {
// Reuse a freed IP from the pool (LIFO - pop from end)
ip_offset = self.network_config.freed_ip_pool.last()
self.network_config.freed_ip_pool.delete_last()
console.print_debug('Reusing IP offset ${ip_offset} from freed pool (pool size: ${self.network_config.freed_ip_pool.len})')
} else {
// No freed IPs available, allocate a new one
// This increment is atomic within the mutex lock
ip_offset = self.network_config.next_ip_offset
self.network_config.next_ip_offset++
// Check if we're approaching the subnet limit (254 usable IPs in /24)
if ip_offset > 254 {
return error('IP address pool exhausted: subnet ${self.network_config.subnet} has no more available IPs. Consider using a larger subnet or multiple bridges.')
}
console.print_debug('Allocated new IP offset ${ip_offset} (next: ${self.network_config.next_ip_offset})')
}
// Build the full IP address
ip := '${base_ip}.${ip_offset}'
self.network_config.allocated_ips[container_name] = ip
console.print_debug('Allocated IP ${ip} to container ${container_name}')
return ip
}
// Setup network for a container (creates veth pair, assigns IP, configures routing)
fn (mut self HeroPods) network_setup_container(container_name string, container_pid int) ! {
console.print_debug('Setting up network for container ${container_name} (PID: ${container_pid})...')
// Allocate IP address (thread-safe)
container_ip := self.network_allocate_ip(container_name)!
bridge_name := self.network_config.bridge_name
subnet_mask := self.network_config.subnet.split('/')[1]
gateway_ip := self.network_config.gateway_ip
// Create veth pair with unique names using hash to avoid collisions
// Interface names are limited to 15 chars, so we use a hash suffix
short_hash := sha256.hexhash(container_name)[..6]
veth_container_short := 'veth-${short_hash}'
veth_bridge_short := 'vbr-${short_hash}'
// Delete veth pair if it already exists (cleanup from previous run)
osal.exec(cmd: 'ip link delete ${veth_container_short} 2>/dev/null', stdout: false) or {}
osal.exec(cmd: 'ip link delete ${veth_bridge_short} 2>/dev/null', stdout: false) or {}
// Create veth pair
console.print_debug('Creating veth pair: ${veth_container_short} <-> ${veth_bridge_short}')
osal.exec(
cmd: 'ip link add ${veth_container_short} type veth peer name ${veth_bridge_short}'
stdout: false
)!
// Attach bridge end to bridge
osal.exec(
cmd: 'ip link set ${veth_bridge_short} master ${bridge_name}'
stdout: false
)!
osal.exec(
cmd: 'ip link set ${veth_bridge_short} up'
stdout: false
)!
// Move container end into container's network namespace
console.print_debug('Moving ${veth_container_short} into container namespace (PID: ${container_pid})')
osal.exec(
cmd: 'ip link set ${veth_container_short} netns ${container_pid}'
stdout: false
)!
// Configure network inside container
console.print_debug('Configuring network inside container: ${container_ip}/${subnet_mask}')
// Rename veth to eth0 inside container for consistency
osal.exec(
cmd: 'nsenter -t ${container_pid} -n ip link set ${veth_container_short} name eth0'
stdout: false
)!
// Assign IP address
osal.exec(
cmd: 'nsenter -t ${container_pid} -n ip addr add ${container_ip}/${subnet_mask} dev eth0'
stdout: false
)!
// Bring interface up
osal.exec(
cmd: 'nsenter -t ${container_pid} -n ip link set dev eth0 up'
stdout: false
)!
// Add default route using gateway IP
osal.exec(
cmd: 'nsenter -t ${container_pid} -n ip route add default via ${gateway_ip}'
stdout: false
)!
console.print_green('Network configured for container ${container_name}: ${container_ip}')
}
// Configure DNS inside container by writing resolv.conf
fn (self HeroPods) network_configure_dns(container_name string, rootfs_path string) ! {
console.print_debug('Configuring DNS for container ${container_name}...')
resolv_conf_path := '${rootfs_path}/etc/resolv.conf'
// Ensure /etc directory exists
etc_dir := '${rootfs_path}/etc'
if !os.exists(etc_dir) {
os.mkdir_all(etc_dir)!
}
// Build DNS configuration from configured DNS servers
mut dns_lines := []string{}
for dns_server in self.network_config.dns_servers {
dns_lines << 'nameserver ${dns_server}'
}
dns_content := dns_lines.join('\n') + '\n'
os.write_file(resolv_conf_path, dns_content)!
dns_servers_str := self.network_config.dns_servers.join(', ')
console.print_debug('DNS configured: ${dns_servers_str}')
}
// Cleanup network for a container (removes veth pair and deallocates IP)
//
// Thread Safety:
// IP deallocation is protected by network_mutex to prevent race conditions
// when multiple containers are being deleted concurrently.
fn (mut self HeroPods) network_cleanup_container(container_name string) ! {
console.print_debug('Cleaning up network for container ${container_name}...')
// Remove veth interfaces (they should be auto-removed when container stops, but cleanup anyway)
// Use same hash logic as setup to ensure we delete the correct interface
short_hash := sha256.hexhash(container_name)[..6]
veth_bridge_short := 'vbr-${short_hash}'
osal.exec(
cmd: 'ip link delete ${veth_bridge_short} 2>/dev/null'
stdout: false
) or { console.print_debug('veth interface ${veth_bridge_short} already removed') }
// Deallocate IP address and return it to the freed pool for reuse (thread-safe)
self.network_mutex.@lock()
defer {
self.network_mutex.unlock()
}
if container_name in self.network_config.allocated_ips {
ip := self.network_config.allocated_ips[container_name]
// Extract the IP offset from the full IP address (e.g., "10.10.0.42" -> 42)
ip_parts := ip.split('.')
if ip_parts.len == 4 {
ip_offset := ip_parts[3].int()
// Add to freed pool for reuse (avoid duplicates)
if ip_offset !in self.network_config.freed_ip_pool {
self.network_config.freed_ip_pool << ip_offset
console.print_debug('Returned IP offset ${ip_offset} to freed pool (pool size: ${self.network_config.freed_ip_pool.len})')
}
}
// Remove from allocated IPs
self.network_config.allocated_ips.delete(container_name)
console.print_debug('Deallocated IP ${ip} from container ${container_name}')
}
}
// Cleanup all network resources (called on reset)
//
// Parameters:
// - full: if true, also removes the bridge (for complete teardown)
// if false, keeps the bridge for reuse (default)
//
// Thread Safety:
// Uses separate lock/unlock calls for read and write operations to minimize
// lock contention. The container cleanup loop runs without holding the lock.
fn (mut self HeroPods) network_cleanup_all(full bool) ! {
console.print_debug('Cleaning up all HeroPods network resources (full=${full})...')
// Get list of containers to cleanup (thread-safe read)
self.network_mutex.@lock()
container_names := self.network_config.allocated_ips.keys()
self.network_mutex.unlock()
// Remove all veth interfaces (no lock needed - operates on local copy)
for container_name in container_names {
self.network_cleanup_container(container_name) or {
console.print_debug('Failed to cleanup network for ${container_name}: ${err}')
}
}
// Clear allocated IPs and freed pool (thread-safe write)
self.network_mutex.@lock()
self.network_config.allocated_ips.clear()
self.network_config.freed_ip_pool.clear()
self.network_config.next_ip_offset = 10
self.network_mutex.unlock()
console.print_debug('Cleared IP allocations and freed pool')
// Optionally remove the bridge for full cleanup
if full {
bridge_name := self.network_config.bridge_name
console.print_debug('Removing bridge ${bridge_name}...')
osal.exec(
cmd: 'ip link delete ${bridge_name}'
stdout: false
) or { console.print_debug('Bridge ${bridge_name} already removed or does not exist') }
}
console.print_debug('Network cleanup complete')
}

35
lib/virt/heropods/utils.v Normal file
View File

@@ -0,0 +1,35 @@
module heropods
// Validate container name to prevent shell injection and path traversal
//
// Security validation that ensures container names:
// - Are not empty and not too long (max 64 chars)
// - Contain only alphanumeric characters, dashes, and underscores
// - Don't start with dash or underscore
// - Don't contain path traversal sequences
//
// This is critical for preventing command injection attacks since container
// names are used in shell commands throughout the module.
fn validate_container_name(name string) ! {
if name == '' {
return error('Container name cannot be empty')
}
if name.len > 64 {
return error('Container name too long (max 64 characters)')
}
// Check if name contains only allowed characters: alphanumeric, dash, underscore
allowed_chars := 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
if !name.contains_only(allowed_chars) {
return error('Container name "${name}" contains invalid characters. Only alphanumeric characters, dashes, and underscores are allowed.')
}
if name.starts_with('-') || name.starts_with('_') {
return error('Container name cannot start with dash or underscore')
}
// Prevent path traversal (redundant check but explicit for security)
if name.contains('..') || name.contains('/') || name.contains('\\') {
return error('Container name cannot contain path separators or ".."')
}
}