From b9dc8996f57dce9cb118968d92a980b96c1ecf4e Mon Sep 17 00:00:00 2001 From: Mahmoud-Emad Date: Fri, 28 Nov 2025 10:37:47 +0200 Subject: [PATCH] feat: Improve Ubuntu installation and SSH execution - Update example configuration comments - Refactor server rescue check to use file_exists - Add Ubuntu installation timeout and polling constants - Implement non-interactive installation script execution - Enhance SSH execution with argument parsing - Add check to skip reinstallation if Ubuntu is already installed - Copy SSH key to new system during installation - Poll for installation completion with progress updates - Use `node.exec` instead of `node.exec_interactive` - Use `execvp` correctly for shell execution - Recreate node connection after server reboot - Adjust SSH wait timeout to milliseconds --- examples/virt/hetzner/hetzner_example.hero | 39 ++--- examples/virt/hetzner/hetzner_kristof2.vsh | 3 +- lib/builder/bootstrapper.v | 4 +- lib/builder/executor_local.v | 5 +- lib/builder/executor_ssh.v | 7 +- lib/virt/hetznermanager/rescue.v | 191 +++++++++++++++++---- 6 files changed, 192 insertions(+), 57 deletions(-) diff --git a/examples/virt/hetzner/hetzner_example.hero b/examples/virt/hetzner/hetzner_example.hero index ac23e1a7..198b07d9 100755 --- a/examples/virt/hetzner/hetzner_example.hero +++ b/examples/virt/hetzner/hetzner_example.hero @@ -1,35 +1,34 @@ #!/usr/bin/env hero +// # Configure HetznerManager, replace with your own credentials, server id's and ssh key name and all other parameters -// !!hetznermanager.configure -// name:"main" -// user:"krist" -// whitelist:"2111181, 2392178, 2545053, 2542166, 2550508, 2550378,2550253" -// password:"wontsethere" -// sshkey:"kristof" +!!hetznermanager.configure + user:"user_name" + whitelist:"server_id" + password:"password" + sshkey:"ssh_key_name" - -// !!hetznermanager.server_rescue -// server_name: 'kristof21' // The name of the server to manage (or use `id`) -// wait: true // Wait for the operation to complete -// hero_install: true // Automatically install Herolib in the rescue system +!!hetznermanager.server_rescue + server_name: 'server_name' // The name of the server to manage (or use `id`) + wait: true // Wait for the operation to complete + hero_install: true // Automatically install Herolib in the rescue system // # Reset a server -// !!hetznermanager.server_reset -// instance: 'main' -// server_name: 'your-server-name' -// wait: true +!!hetznermanager.server_reset + instance: 'main' + server_name: 'server_name' + wait: true // # Add a new SSH key to your Hetzner account -// !!hetznermanager.key_create -// instance: 'main' -// key_name: 'my-laptop-key' -// data: 'ssh-rsa AAAA...' +!!hetznermanager.key_create + instance: 'main' + key_name: 'ssh_key_name' + data: 'ssh-rsa AAAA...' // Install Ubuntu 24.04 on a server !!hetznermanager.ubuntu_install - server_name: 'kristof2' + server_name: 'server_name' wait: true hero_install: true // Install Herolib on the new OS diff --git a/examples/virt/hetzner/hetzner_kristof2.vsh b/examples/virt/hetzner/hetzner_kristof2.vsh index 9647b6ec..3df6077b 100755 --- a/examples/virt/hetzner/hetzner_kristof2.vsh +++ b/examples/virt/hetzner/hetzner_kristof2.vsh @@ -60,9 +60,8 @@ mut n := b.node_new(ipaddr: serverinfo.server_ip)! // this will put hero in debug mode on the system // n.hero_install(compile: true)! -n.shell('')! - cl.ubuntu_install(name: name, wait: true, hero_install: true)! +n.shell('')! // cl.ubuntu_install(name: 'kristof20', wait: true, hero_install: true)! // cl.ubuntu_install(id:2550378, name: 'kristof21', wait: true, hero_install: true)! // cl.ubuntu_install(id:2550508, name: 'kristof22', wait: true, hero_install: true)! diff --git a/lib/builder/bootstrapper.v b/lib/builder/bootstrapper.v index 46ebaa6f..a027d492 100644 --- a/lib/builder/bootstrapper.v +++ b/lib/builder/bootstrapper.v @@ -67,7 +67,9 @@ pub fn (mut node Node) hero_install(args HeroInstallArgs) ! { todo << 'bash /tmp/install_v.sh --herolib ' } } - node.exec_interactive(todo.join('\n'))! + // Use exec instead of exec_interactive since user interaction is not needed + // exec_interactive uses shell mode which replaces the process and never returns + node.exec(cmd: todo.join('\n'), stdout: true)! } @[params] diff --git a/lib/builder/executor_local.v b/lib/builder/executor_local.v index f1f513cf..8b63b208 100644 --- a/lib/builder/executor_local.v +++ b/lib/builder/executor_local.v @@ -99,8 +99,11 @@ pub fn (mut executor ExecutorLocal) download(args SyncArgs) ! { } pub fn (mut executor ExecutorLocal) shell(cmd string) ! { + // Note: os.execvp replaces the current process and never returns. + // This is intentional - shell() is designed to hand over control to the shell. + // Do not put shell() before any other code that needs to execute. if cmd.len > 0 { - os.execvp('/bin/bash', ["-c '${cmd}'"])! + os.execvp('/bin/bash', ['-c', cmd])! } else { os.execvp('/bin/bash', [])! } diff --git a/lib/builder/executor_ssh.v b/lib/builder/executor_ssh.v index ce767f2f..d503415b 100644 --- a/lib/builder/executor_ssh.v +++ b/lib/builder/executor_ssh.v @@ -235,11 +235,12 @@ pub fn (mut executor ExecutorSSH) info() map[string]string { // forwarding ssh traffic to certain container pub fn (mut executor ExecutorSSH) shell(cmd string) ! { + mut args := ['-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null', + '${executor.user}@${executor.ipaddr.addr}', '-p', '${executor.ipaddr.port}'] if cmd.len > 0 { - panic('TODO IMPLEMENT SHELL EXEC OVER SSH') + args << cmd } - os.execvp('ssh', ['-o StrictHostKeyChecking=no', '${executor.user}@${executor.ipaddr.addr}', - '-p ${executor.ipaddr.port}'])! + os.execvp('ssh', args)! } pub fn (mut executor ExecutorSSH) list(path string) ![]string { diff --git a/lib/virt/hetznermanager/rescue.v b/lib/virt/hetznermanager/rescue.v index ae8a0a0a..44d068d6 100644 --- a/lib/virt/hetznermanager/rescue.v +++ b/lib/virt/hetznermanager/rescue.v @@ -1,12 +1,16 @@ module hetznermanager -import incubaid.herolib.core.texttools import time import incubaid.herolib.ui.console import incubaid.herolib.osal.core as osal import incubaid.herolib.builder import os +// Ubuntu installation timeout constants +const install_timeout_seconds = 600 // 10 minutes max for installation +const install_poll_interval_seconds = 5 // Check installation status every 5 seconds +const install_progress_interval = 6 // Show progress every 6 polls (30 seconds) + // ///////////////////////////RESCUE pub struct RescueInfo { @@ -51,19 +55,29 @@ fn (mut h HetznerManager) server_rescue_internal(args_ ServerRescueArgs) !Server if serverinfo.rescue && !args.reset { if osal.ssh_test(address: serverinfo.server_ip, port: 22)! == .ok { - console.print_debug('test server ${serverinfo.server_name} is in rescue mode?') + console.print_debug('test server ${serverinfo.server_name} - checking if actually in rescue mode...') mut b := builder.new()! mut n := b.node_new(ipaddr: serverinfo.server_ip)! - res := n.exec(cmd: 'ls /root/.oldroot/nfs/install/installimage', stdout: false) or { - 'ERROR' - } - if res.contains('nfs/install/installimage') { + // Check if the server is actually in rescue mode using file_exists + if n.file_exists('/root/.oldroot/nfs/install/installimage') { console.print_debug('server ${serverinfo.server_name} is in rescue mode') return serverinfo } + + // Server is reachable but not in rescue mode - check if it's running Ubuntu + // This happens when the API reports rescue=true but the server already booted into the installed OS + if n.platform == .ubuntu { + console.print_debug('server ${serverinfo.server_name} is already running Ubuntu, not in rescue mode') + } else { + console.print_debug('server ${serverinfo.server_name} is running ${n.platform}, not in rescue mode') + } + // Server is not in rescue mode - the rescue flag in API is stale + serverinfo.rescue = false + } else { + // SSH not reachable - server might be rebooting or in unknown state + serverinfo.rescue = false } - serverinfo.rescue = false } // only do it if its not in rescue yet if serverinfo.rescue == false || args.reset { @@ -132,16 +146,48 @@ pub mut: hero_install bool hero_install_compile bool raid bool + install_timeout int = install_timeout_seconds // timeout in seconds for installation + reinstall bool // if true, always reinstall even if Ubuntu is already running } pub fn (mut h HetznerManager) ubuntu_install(args ServerInstallArgs) !&builder.Node { h.check_whitelist(name: args.name, id: args.id)! - mut serverinfo := h.server_rescue( + mut serverinfo := h.server_info_get(id: args.id, name: args.name)! + + // Check if Ubuntu is already installed and running (skip reinstallation unless forced) + if !args.reinstall { + if osal.ssh_test(address: serverinfo.server_ip, port: 22)! == .ok { + mut b := builder.new()! + mut n := b.node_new(ipaddr: serverinfo.server_ip)! + + // Check if server is running Ubuntu and NOT in rescue mode using Node's methods + is_rescue := n.file_exists('/root/.oldroot/nfs/install/installimage') + + if n.platform == .ubuntu && !is_rescue { + console.print_debug('server ${serverinfo.server_name} is already running Ubuntu, skipping installation') + + // Still install hero if requested + if args.hero_install { + n.exec_silent('apt update && apt install -y mc redis libpq5 libpq-dev')! + n.hero_install(compile: args.hero_install_compile)! + } + + return n + } + } + } + + // Server needs Ubuntu installation - go into rescue mode + serverinfo = h.server_rescue( id: args.id name: args.name wait: true )! + // Get the SSH key data to copy to the installed system + mykey := h.key_get(h.sshkey)! + ssh_pubkey := mykey.data + mut b := builder.new()! mut n := b.node_new(ipaddr: serverinfo.server_ip)! @@ -155,26 +201,106 @@ pub fn (mut h HetznerManager) ubuntu_install(args ServerInstallArgs) !&builder.N rstr = '-r yes -l 1 ' } + // Write the installation script to the server + // We run it with nohup in the background to avoid SSH timeout during long installations + install_script := '#!/bin/bash +set -e +echo "go into install mode, try to install ubuntu 24.04" + +# Cleanup any previous installation state +rm -f /tmp/install_complete /tmp/install_failed + +if [ -d /sys/firmware/efi ]; then + echo "UEFI system detected → need ESP" + PARTS="/boot/efi:esp:256M,swap:swap:4G,/boot:ext3:1024M,/:btrfs:all" +else + echo "BIOS/legacy system detected → no ESP" + PARTS="swap:swap:4G,/boot:ext3:1024M,/:btrfs:all" +fi + +# installimage invocation with error handling +if ! /root/.oldroot/nfs/install/installimage -a -n "${args.name}" ${rstr} -i /root/.oldroot/nfs/images/Ubuntu-2404-noble-amd64-base.tar.gz -f yes -t yes -p "\$PARTS"; then + echo "INSTALL_FAILED" > /tmp/install_failed + echo "installimage failed, check /root/debug.txt for details" + exit 1 +fi + +# Copy SSH key to the installed system before rebooting +# After installimage, the new system is mounted at /mnt +echo "Copying SSH key to installed system..." +mkdir -p /mnt/root/.ssh +chmod 700 /mnt/root/.ssh +echo "${ssh_pubkey}" > /mnt/root/.ssh/authorized_keys +chmod 600 /mnt/root/.ssh/authorized_keys +echo "SSH key copied successfully" + +# Mark installation as complete before rebooting +# sync to ensure marker file is written to disk before reboot +echo "INSTALL_COMPLETE" > /tmp/install_complete +sync + +reboot +' + + n.file_write('/tmp/ubuntu_install.sh', install_script)! + + // Start the installation in background using nohup to avoid SSH timeout + // The script will run independently of the SSH session n.exec( - cmd: ' - set -ex - echo "go into install mode, try to install ubuntu 24.04" - - if [ -d /sys/firmware/efi ]; then - echo "UEFI system detected → need ESP" - PARTS="/boot/efi:esp:256M,swap:swap:4G,/boot:ext3:1024M,/:btrfs:all" - else - echo "BIOS/legacy system detected → no ESP" - PARTS="swap:swap:4G,/boot:ext3:1024M,/:btrfs:all" - fi - - # installimage invocation - /root/.oldroot/nfs/install/installimage -a -n "${args.name}" ${rstr} -i /root/.oldroot/nfs/images/Ubuntu-2404-noble-amd64-base.tar.gz -f yes -t yes -p "\$PARTS" - - reboot - ' + cmd: 'chmod +x /tmp/ubuntu_install.sh && nohup /tmp/ubuntu_install.sh > /tmp/install.log 2>&1 &' + stdout: false )! + console.print_debug('Installation script started in background, waiting for completion...') + + // Poll for completion by checking if the marker file exists or if the server goes down (reboot) + max_iterations := args.install_timeout / install_poll_interval_seconds + mut install_complete := false + for i := 0; i < max_iterations; i++ { + time.sleep(install_poll_interval_seconds * time.second) + + // Check if server is still up and installation status + result := n.exec( + cmd: 'cat /tmp/install_failed 2>/dev/null && echo "FAILED" || (cat /tmp/install_complete 2>/dev/null || echo "NOT_COMPLETE")' + stdout: false + ) or { + // SSH connection failed - server might be rebooting after successful installation + console.print_debug('SSH connection lost - server is likely rebooting after installation') + install_complete = true + break + } + + // Check for installation failure + if result.contains('INSTALL_FAILED') || result.contains('FAILED') { + // Try to get error details from install log + error_log := n.exec( + cmd: 'tail -20 /tmp/install.log 2>/dev/null || cat /root/debug.txt 2>/dev/null || echo "No error details available"' + stdout: false + ) or { 'Could not retrieve error details' } + return error('Installation failed: ${error_log.trim_space()}') + } + + if result.contains('INSTALL_COMPLETE') { + console.print_debug('Installation complete, server should reboot soon') + install_complete = true + break + } + + // Show progress at configured interval + if i % install_progress_interval == 0 { + // Try to get the last line of the install log for progress + log_tail := n.exec( + cmd: 'tail -3 /tmp/install.log 2>/dev/null || echo "waiting..."' + stdout: false + ) or { 'waiting...' } + console.print_debug('Installation in progress: ${log_tail.trim_space()}') + } + } + + if !install_complete { + return error('Installation timed out after ${args.install_timeout} seconds') + } + os.execute_opt('ssh-keygen -R ${serverinfo.server_ip}')! console.print_debug('server ${serverinfo.server_name} is installed in ubuntu now, should be restarting.') @@ -187,15 +313,20 @@ pub fn (mut h HetznerManager) ubuntu_install(args ServerInstallArgs) !&builder.N console.print_debug('server ${serverinfo.server_name} is reacheable over ping, lets now try ssh.') - // wait 20 sec to make sure ssh is there - osal.ssh_wait(address: serverinfo.server_ip, timeout: 20)! + // wait 20 seconds to make sure ssh is there (timeout is in milliseconds) + osal.ssh_wait(address: serverinfo.server_ip, timeout: 20000)! console.print_debug('server ${serverinfo.server_name} is reacheable over ssh, lets now install hero if asked for.') + // Create a new node connection to the freshly installed Ubuntu system + // The old 'n' was connected to the rescue system which no longer exists after reboot + mut b2 := builder.new()! + mut n2 := b2.node_new(ipaddr: serverinfo.server_ip)! + if args.hero_install { - n.exec_silent('apt update && apt install -y mc redis libpq5 libpq-dev')! - n.hero_install(compile: args.hero_install_compile)! + n2.exec_silent('apt update && apt install -y mc redis libpq5 libpq-dev')! + n2.hero_install(compile: args.hero_install_compile)! } - return n + return n2 }