(unstable) pushing WIP

This commit is contained in:
Maxime Van Hees
2025-08-25 15:25:00 +02:00
parent af89ef0149
commit 1bb731711b
4 changed files with 456 additions and 107 deletions

View File

@@ -43,6 +43,8 @@ pub struct VmSpec {
pub id: String,
/// Optional for firmware boot; required for direct kernel boot
pub kernel_path: Option<String>,
/// Optional initramfs when using direct kernel boot
pub initramfs_path: Option<String>,
/// Optional for direct kernel boot; required for firmware boot
pub firmware_path: Option<String>,
/// Disk image path (qcow2 or raw)
@@ -228,38 +230,104 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
let _ = fs::remove_file(&api_path);
// Preflight disk: if source is qcow2, convert to raw to avoid CH "Compressed blocks not supported"
// This is best-effort: if qemu-img is unavailable or info fails, we skip conversion.
// Robust conversion:
// - Remove any stale destination
// - Try direct convert to destination file
// - On failure (e.g., byte-range lock issues), fallback to piping stdout into dd
let mut disk_to_use = rec.spec.disk_path.clone();
if let Ok(info) = qcow2::info(&disk_to_use) {
if info.get("format").and_then(|v| v.as_str()) == Some("qcow2") {
let dest = vm_dir(id).join("disk.raw").to_string_lossy().into_owned();
let cmd = format!(
// Best-effort remove stale target file to avoid locking errors
let _ = fs::remove_file(&dest);
// Attempt 1: normal qemu-img convert to dest file
let cmd1 = format!(
"qemu-img convert -O raw {} {}",
shell_escape(&disk_to_use),
shell_escape(&dest)
);
match sal_process::run(&cmd).silent(true).execute() {
Ok(res) if res.success => {
disk_to_use = dest;
let attempt1 = sal_process::run(&cmd1).silent(true).die(false).execute();
let mut converted_ok = false;
let mut err1: Option<String> = None;
if let Ok(res) = attempt1 {
if res.success {
converted_ok = true;
} else {
err1 = Some(format!("{}{}", res.stdout, res.stderr));
}
Ok(res) => {
return Err(CloudHvError::CommandFailed(format!(
"Failed converting qcow2 to raw: {}",
res.stderr
)));
}
Err(e) => {
return Err(CloudHvError::CommandFailed(format!(
"Failed converting qcow2 to raw: {}",
e
)));
} else if let Err(e) = attempt1 {
err1 = Some(e.to_string());
}
if !converted_ok {
// Attempt 2: pipe via stdout into dd (avoids qemu-img destination locking semantics on some FS)
let cmd2 = format!(
"#!/bin/bash -euo pipefail\nqemu-img convert -O raw {} - | dd of={} bs=4M status=none",
shell_escape(&disk_to_use),
shell_escape(&dest)
);
match sal_process::run(&cmd2).silent(true).die(false).execute() {
Ok(res) if res.success => {
converted_ok = true;
}
Ok(res) => {
let mut msg = String::from("Failed converting qcow2 to raw.");
if let Some(e1) = err1 {
msg.push_str(&format!("\nFirst attempt error:\n{}", e1));
}
msg.push_str(&format!("\nSecond attempt error:\n{}{}", res.stdout, res.stderr));
return Err(CloudHvError::CommandFailed(msg));
}
Err(e) => {
let mut msg = String::from("Failed converting qcow2 to raw.");
if let Some(e1) = err1 {
msg.push_str(&format!("\nFirst attempt error:\n{}", e1));
}
msg.push_str(&format!("\nSecond attempt error:\n{}", e));
return Err(CloudHvError::CommandFailed(msg));
}
}
}
if converted_ok {
disk_to_use = dest;
}
}
}
// Build command (minimal args for Phase 2)
// We redirect all output to log_file via shell and keep process in background with nohup
// Consolidate extra --disk occurrences from spec.extra_args into a single --disk (CH version requires variadic form)
// Collect disk value tokens provided by the user and strip them from extra args so we can render one '--disk' followed by multiple values.
let mut extra_disk_vals: Vec<String> = Vec::new();
let mut extra_args_sans_disks: Vec<String> = Vec::new();
if let Some(extra) = rec.spec.extra_args.clone() {
let mut i = 0usize;
while i < extra.len() {
let tok = extra[i].clone();
if tok == "--disk" {
if i + 1 < extra.len() {
extra_disk_vals.push(extra[i + 1].clone());
i += 2;
continue;
} else {
// dangling --disk without value; drop it
i += 1;
continue;
}
} else if let Some(rest) = tok.strip_prefix("--disk=") {
if !rest.is_empty() {
extra_disk_vals.push(rest.to_string());
}
i += 1;
continue;
}
// keep token
extra_args_sans_disks.push(tok);
i += 1;
}
}
// CH CLI flags (very common subset)
// --disk path=... uses virtio-blk by default
@@ -282,6 +350,12 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
.unwrap_or_else(|| "console=ttyS0 reboot=k panic=1".to_string());
parts.push("--kernel".into());
parts.push(kpath);
if let Some(initrd) = rec.spec.initramfs_path.clone() {
if Path::new(&initrd).exists() {
parts.push("--initramfs".into());
parts.push(initrd);
}
}
parts.push("--cmdline".into());
parts.push(cmdline);
} else {
@@ -292,6 +366,10 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
parts.push("--disk".into());
parts.push(format!("path={}", disk_to_use));
// Append any additional disk value tokens (from sanitized extra args) so CH sees a single '--disk' with multiple values
for dv in &extra_disk_vals {
parts.push(dv.clone());
}
parts.push("--cpus".into());
parts.push(format!("boot={}", rec.spec.vcpus));
parts.push("--memory".into());
@@ -301,36 +379,50 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
parts.push("--console".into());
parts.push("off".into());
// Networking prerequisites (bridge + NAT via nftables + dnsmasq DHCP)
// Defaults can be overridden via env:
// HERO_VIRT_BRIDGE_NAME, HERO_VIRT_BRIDGE_ADDR_CIDR, HERO_VIRT_SUBNET_CIDR, HERO_VIRT_DHCP_START, HERO_VIRT_DHCP_END
let bridge_name = std::env::var("HERO_VIRT_BRIDGE_NAME").unwrap_or_else(|_| "br-hero".into());
let bridge_addr_cidr = std::env::var("HERO_VIRT_BRIDGE_ADDR_CIDR").unwrap_or_else(|_| "172.30.0.1/24".into());
let subnet_cidr = std::env::var("HERO_VIRT_SUBNET_CIDR").unwrap_or_else(|_| "172.30.0.0/24".into());
let dhcp_start = std::env::var("HERO_VIRT_DHCP_START").unwrap_or_else(|_| "172.30.0.50".into());
let dhcp_end = std::env::var("HERO_VIRT_DHCP_END").unwrap_or_else(|_| "172.30.0.250".into());
// Determine if the user provided explicit network arguments (e.g. "--net", "tap=...,mac=...")
// If so, do NOT provision the default host networking or add a default NIC.
let has_user_net = rec
.spec
.extra_args
.as_ref()
.map(|v| v.iter().any(|tok| tok == "--net"))
.unwrap_or(false);
// Ensure host-side networking (requires root privileges / CAP_NET_ADMIN)
ensure_host_net_prereq_dnsmasq_nftables(
&bridge_name,
&bridge_addr_cidr,
&subnet_cidr,
&dhcp_start,
&dhcp_end,
)?;
if !has_user_net {
// Networking prerequisites (bridge + NAT via nftables + dnsmasq DHCP)
// Defaults can be overridden via env:
// HERO_VIRT_BRIDGE_NAME, HERO_VIRT_BRIDGE_ADDR_CIDR, HERO_VIRT_SUBNET_CIDR, HERO_VIRT_DHCP_START, HERO_VIRT_DHCP_END
let bridge_name = std::env::var("HERO_VIRT_BRIDGE_NAME").unwrap_or_else(|_| "br-hero".into());
let bridge_addr_cidr =
std::env::var("HERO_VIRT_BRIDGE_ADDR_CIDR").unwrap_or_else(|_| "172.30.0.1/24".into());
let subnet_cidr =
std::env::var("HERO_VIRT_SUBNET_CIDR").unwrap_or_else(|_| "172.30.0.0/24".into());
let dhcp_start =
std::env::var("HERO_VIRT_DHCP_START").unwrap_or_else(|_| "172.30.0.50".into());
let dhcp_end =
std::env::var("HERO_VIRT_DHCP_END").unwrap_or_else(|_| "172.30.0.250".into());
// Ensure a TAP device for this VM and attach to the bridge
let tap_name = ensure_tap_for_vm(&bridge_name, id)?;
// Stable locally-administered MAC derived from VM id
let mac = stable_mac_from_id(id);
// Ensure host-side networking (requires root privileges / CAP_NET_ADMIN)
ensure_host_net_prereq_dnsmasq_nftables(
&bridge_name,
&bridge_addr_cidr,
&subnet_cidr,
&dhcp_start,
&dhcp_end,
)?;
parts.push("--net".into());
parts.push(format!("tap={},mac={}", tap_name, mac));
// Ensure a TAP device for this VM and attach to the bridge
let tap_name = ensure_tap_for_vm(&bridge_name, id)?;
// Stable locally-administered MAC derived from VM id
let mac = stable_mac_from_id(id);
if let Some(extra) = rec.spec.extra_args.clone() {
for e in extra {
parts.push(e);
}
parts.push("--net".into());
parts.push(format!("tap={},mac={}", tap_name, mac));
}
// Append any user-provided extra args, sans any '--disk' we already consolidated
for e in extra_args_sans_disks {
parts.push(e);
}
let args_str = shell_join(&parts);
@@ -369,6 +461,32 @@ echo $! > '{}'
Err(_) => None,
};
// Quick health check: ensure process did not exit immediately due to CLI errors (e.g., duplicate flags)
if let Some(pid_num) = pid {
thread::sleep(Duration::from_millis(300));
if !proc_exists(pid_num) {
// Tail log to surface the error cause
let tail_cmd = format!("tail -n 200 {}", shell_escape(&log_file));
let tail = sal_process::run(&tail_cmd).die(false).silent(true).execute();
let mut log_snip = String::new();
if let Ok(res) = tail {
if res.success {
log_snip = res.stdout;
} else {
log_snip = format!("{}{}", res.stdout, res.stderr);
}
}
return Err(CloudHvError::CommandFailed(format!(
"cloud-hypervisor exited immediately after start. Log tail:\n{}",
log_snip
)));
}
} else {
return Err(CloudHvError::CommandFailed(
"failed to obtain cloud-hypervisor PID (start script did not write pid)".into(),
));
}
// Update state
rec.runtime.pid = pid;
rec.runtime.status = if pid.is_some() { "running".into() } else { "stopped".into() };

View File

@@ -17,6 +17,7 @@ fn hv_to_rhai<T>(r: Result<T, cloudhv::CloudHvError>) -> Result<T, Box<EvalAltRe
fn map_to_vmspec(spec: Map) -> Result<VmSpec, Box<EvalAltResult>> {
let id = must_get_string(&spec, "id")?;
let kernel_path = get_string(&spec, "kernel_path");
let initramfs_path = get_string(&spec, "initramfs_path");
let firmware_path = get_string(&spec, "firmware_path");
let disk_path = must_get_string(&spec, "disk_path")?;
let api_socket = get_string(&spec, "api_socket").unwrap_or_else(|| "".to_string());
@@ -28,6 +29,7 @@ fn map_to_vmspec(spec: Map) -> Result<VmSpec, Box<EvalAltResult>> {
Ok(VmSpec {
id,
kernel_path,
initramfs_path,
firmware_path,
disk_path,
api_socket,
@@ -46,6 +48,11 @@ fn vmspec_to_map(s: &VmSpec) -> Map {
} else {
m.insert("kernel_path".into(), Dynamic::UNIT);
}
if let Some(ir) = &s.initramfs_path {
m.insert("initramfs_path".into(), ir.clone().into());
} else {
m.insert("initramfs_path".into(), Dynamic::UNIT);
}
if let Some(fw) = &s.firmware_path {
m.insert("firmware_path".into(), fw.clone().into());
} else {

View File

@@ -140,25 +140,25 @@ if !missing {
print("⚠️ API socket not found (continuing)");
}
print("\n--- Test 5: Stop VM (graceful) ---");
try {
cloudhv_vm_stop(vm_id, false);
print("✓ VM stop invoked (graceful)");
} catch (err) {
print(`⚠️ VM stop failed: ${err}`);
}
// print("\n--- Test 5: Stop VM (graceful) ---");
// try {
// cloudhv_vm_stop(vm_id, false);
// print("✓ VM stop invoked (graceful)");
// } catch (err) {
// print(`⚠️ VM stop failed: ${err}`);
// }
} else {
print("\n⚠ Skipping start/stop because required inputs are missing.");
}
print("\n--- Test 6: Delete VM definition ---");
try {
cloudhv_vm_delete(vm_id, false);
print("✓ VM deleted");
} catch (err) {
print(`❌ VM delete failed: ${err}`);
print("=== CloudHV Tests Aborted ===");
exit();
}
// print("\n--- Test 6: Delete VM definition ---");
// try {
// cloudhv_vm_delete(vm_id, false);
// print("✓ VM deleted");
// } catch (err) {
// print(`❌ VM delete failed: ${err}`);
// print("=== CloudHV Tests Aborted ===");
// exit();
// }
print("\n=== Cloud Hypervisor Basic Tests Completed ===");

View File

@@ -16,7 +16,7 @@
print("=== CloudHV + cloud-init + host DHCP (dnsmasq) ===");
// ----------- User input -----------
let user_pubkey = "ssh-ed25519 REPLACE_WITH_YOUR_PUBLIC_KEY user@host";
let user_pubkey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFyZJCEsvRc0eitsOoq+ywC5Lmqejvk3hXMVbO0AxPrd maxime@maxime-arch";
// Optional: choose boot method. If firmware is present in common locations, it will be used.
// Otherwise, if kernel_path exists, direct kernel boot will be used.
@@ -31,6 +31,8 @@ let br_cidr = "192.168.127.1/24";
let br_ip = "192.168.127.1";
let tap = "tap0";
let mac = "02:00:00:00:00:10"; // locally administered MAC
// Deterministic IP for the VM (dnsmasq will pin this MAC to this IP)
let vm_static_ip = "192.168.127.100";
// Paths
let base_dir = "/tmp/virt_images";
@@ -81,8 +83,9 @@ print("✓ Dependencies look OK");
print("\n--- Ensuring Ubuntu 24.04 cloud image ---");
let base;
try {
// Adjust the size_gb as desired; this resizes the cloud image sparsely.
base = qcow2_build_ubuntu_24_04_base(base_dir, 10);
// Avoid resizing to prevent GPT backup-header mismatch that can break early boot on some kernels/firmware.
// Use 0 to keep the original image size; cloud-init/cloud-image tooling can grow the FS later if needed.
base = qcow2_build_ubuntu_24_04_base(base_dir, 0);
} catch (err) {
print(`❌ Failed to build/ensure base image: ${err}`);
exit();
@@ -93,55 +96,106 @@ print(`✓ Using base image: ${disk_path}`);
// ----------- Host networking (bridge + tap) -----------
print("\n--- Configuring host networking (bridge + tap) ---");
// Idempotent: create br0 if missing; assign IP if not present; set up
run_silent(`sudo sh -lc 'ip link show ${bridge} >/dev/null 2>&1 || ip link add ${bridge} type bridge'`);
run_silent(`sudo sh -lc 'ip addr show dev ${bridge} | grep -q "${br_cidr}" || ip addr add ${br_cidr} dev ${bridge}'`);
run_silent(`sudo sh -lc 'ip link set ${bridge} up'`);
let net_script = `
sudo ip link show ${bridge} >/dev/null 2>&1 || sudo ip link add ${bridge} type bridge
ip addr show dev ${bridge} | grep -q "${br_cidr}" || sudo ip addr add ${br_cidr} dev ${bridge}
sudo ip link set ${bridge} up
// Idempotent: create tap and attach to bridge
run_silent(`sudo sh -lc 'ip link show ${tap} >/dev/null 2>&1 || ip tuntap add dev ${tap} mode tap'`);
run_silent(`sudo sh -lc 'bridge link | grep -q "${tap}" || ip link set ${tap} master ${bridge}'`);
run_silent(`sudo sh -lc 'ip link set ${tap} up'`);
# Remove any stale TAP to avoid "Resource busy" when CH configures it
if ip link show ${tap} >/dev/null 2>&1; then
sudo ip link set ${tap} down || true
sudo ip link del ${tap} || true
fi
`;
run_silent(net_script);
print(`✓ Bridge ${bridge} and tap ${tap} configured`);
print("Note: NO-CARRIER on a bridge/tap without a peer is normal; DHCP will work once the guest brings its interface up.");
// ----------- Start/ensure dnsmasq on br0 -----------
print("\n--- Ensuring dnsmasq serving DHCP on the bridge ---");
// Ensure log/lease directory exists before starting dnsmasq
run_silent(`mkdir -p ${base_dir}`);
// If an instance with our pid-file is running, keep it; otherwise start a new one bound to br0.
// Use --port=0 to avoid DNS port conflicts; we only need DHCP here.
let dns_state = run_silent(`bash -lc 'if [ -f ${dnsmasq_pid} ] && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1; then echo RUNNING; else echo STOPPED; fi'`);
let dns_state = run_silent(`
if [ -f ${dnsmasq_pid} ] && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1; then
echo RUNNING
elif pgrep -f "dnsmasq .*--interface=${bridge}" >/dev/null 2>&1; then
echo RUNNING
elif [ -f ${dnsmasq_log} ] && grep -q "sockets bound exclusively to interface ${bridge}" ${dnsmasq_log}; then
echo RUNNING
else
echo STOPPED
fi
`);
let need_start = true;
if dns_state.success && dns_state.stdout.trim() == "RUNNING" {
print("✓ dnsmasq already running (pid file present and alive)");
need_start = false;
} else {
// Clean stale files
run_silent(`bash -lc 'rm -f ${dnsmasq_pid} ${dnsmasq_lease}'`);
run_silent(`rm -f ${dnsmasq_pid} ${dnsmasq_lease}`);
}
if need_start {
let cmd = `
nohup sudo dnsmasq \
--port=0 \
--bind-interfaces \
--except-interface=lo \
--interface=${bridge} \
--dhcp-range=192.168.127.100,192.168.127.200,12h \
--dhcp-option=option:router,${br_ip} \
--dhcp-option=option:dns-server,1.1.1.1 \
--pid-file=${dnsmasq_pid} \
--dhcp-leasefile=${dnsmasq_lease} \
> ${dnsmasq_log} 2>&1 &
echo $! >/dev/null
`;
let r = run_silent(`bash -lc ${cmd.stringify()}`);
// Start dnsmasq detached and force a clean, self-contained configuration.
// - Use --conf-file=/dev/null to avoid system config conflicts
// - Log directly via --log-facility to capture early failures
// - Run under current privileges (herodo is invoked with sudo)
let r = run_silent(`
: > ${dnsmasq_log}
nohup dnsmasq \
--conf-file=/dev/null \
--log-facility=${dnsmasq_log} \
--log-dhcp \
--user=root \
--group=root \
--port=0 \
--bind-interfaces \
--except-interface=lo \
--interface=${bridge} \
--dhcp-range=192.168.127.100,192.168.127.200,12h \
--dhcp-option=option:router,${br_ip} \
--dhcp-option=option:dns-server,1.1.1.1 \
--dhcp-host=${mac},${vm_static_ip} \
--pid-file=${dnsmasq_pid} \
--dhcp-leasefile=${dnsmasq_lease} &
`);
if !r.success {
print(`❌ Failed to start dnsmasq. Check log: ${dnsmasq_log}`);
let t = run_silent(`
if [ -f ${dnsmasq_log} ]; then
tail -n 200 ${dnsmasq_log}
fi
`);
if t.success && t.stdout.trim() != "" { print(t.stdout); }
exit();
}
// Wait briefly for pid file
sleep(1);
let chk = run_silent(`bash -lc 'test -f ${dnsmasq_pid} && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1 && echo OK || echo FAIL'`);
if !(chk.success && chk.stdout.trim() == "OK") {
// Robust readiness: wait up to 10s for pidfile OR process OR log pattern
let ready = run_silent(`
for i in $(seq 1 10); do
if [ -f ${dnsmasq_pid} ] && ps -p $(cat ${dnsmasq_pid}) >/dev/null 2>&1; then
echo OK; exit 0
fi
if pgrep -f "dnsmasq .*--interface=${bridge}" >/dev/null 2>&1; then
echo OK; exit 0
fi
if [ -f ${dnsmasq_log} ] && grep -q "sockets bound exclusively to interface ${bridge}" ${dnsmasq_log}; then
echo OK; exit 0
fi
sleep 1
done
echo FAIL
`);
if !(ready.success && ready.stdout.contains("OK")) {
print(`❌ dnsmasq did not come up. See ${dnsmasq_log}`);
let t = run_silent(`
if [ -f ${dnsmasq_log} ]; then
tail -n 200 ${dnsmasq_log}
fi
`);
if t.success && t.stdout.trim() != "" { print(t.stdout); }
exit();
}
print("✓ dnsmasq started (DHCP on br0)");
@@ -149,7 +203,8 @@ if need_start {
// ----------- Build cloud-init NoCloud seed (user-data/meta-data) -----------
print("\n--- Building NoCloud seed (user-data, meta-data) ---");
run_silent(`bash -lc 'mkdir -p ${base_dir}'`);
run_silent(`mkdir -p ${base_dir}`);
run_silent(`chmod 1777 ${base_dir}`);
// Compose user-data and meta-data content
let ud = `#cloud-config
@@ -169,15 +224,41 @@ local-hostname: noblevm
`;
// Write files via heredoc
let wr1 = run_silent(`bash -lc "cat > ${user_data} <<'EOF'\n${ud}\nEOF"`);
let wr1 = run_silent(`
cat > ${user_data} <<'EOF'
${ud}
EOF
`);
if !wr1.success { print(`❌ Failed to write ${user_data}`); exit(); }
let wr2 = run_silent(`bash -lc "cat > ${meta_data} <<'EOF'\n${md}\nEOF"`);
let wr2 = run_silent(`
cat > ${meta_data} <<'EOF'
${md}
EOF
`);
if !wr2.success { print(`❌ Failed to write ${meta_data}`); exit(); }
// Provide cloud-init network-config to ensure the NIC with our MAC requests DHCP
let net_config = `${base_dir}/network-config`;
let nc = `version: 2
ethernets:
nic0:
match:
macaddress: ${mac}
set-name: eth0
renderer: networkd
dhcp4: true
`;
let wr3 = run_silent(`
cat > ${net_config} <<'EOF'
${nc}
EOF
`);
if !wr3.success { print(`❌ Failed to write ${net_config}`); exit(); }
// Build seed ISO (prefer cloud-localds)
let built = false;
if !(clds == () || clds == "") {
let r = run_silent(`bash -lc "sudo cloud-localds ${seed_iso} ${user_data} ${meta_data}"`);
let r = run_silent(`sudo cloud-localds --network-config ${net_config} ${seed_iso} ${user_data} ${meta_data}`);
if r.success {
built = true;
}
@@ -187,7 +268,7 @@ if !built {
print("❌ Neither cloud-localds nor genisoimage succeeded/available to build seed.iso");
exit();
}
let r2 = run_silent(`bash -lc "sudo genisoimage -output ${seed_iso} -volid cidata -joliet -rock ${user_data} ${meta_data}"`);
let r2 = run_silent(`sudo genisoimage -output ${seed_iso} -volid cidata -joliet -rock ${user_data} ${meta_data} ${net_config}`);
if !r2.success {
print("❌ genisoimage failed to create seed.iso");
exit();
@@ -228,9 +309,18 @@ if firmware_path != "" {
// ----------- Create and start VM -----------
print("\n--- Creating and starting VM ---");
let rid = run_silent("date +%s%N");
let suffix = if rid.success && rid.stdout != "" { rid.stdout.trim() } else { "100000" };
let suffix = if rid.success && rid.stdout.trim() != "" { rid.stdout.trim() } else { "100000" };
let vm_id = `noble_vm_${suffix}`;
// Use a unique TAP per run to avoid "Resource busy" conflicts.
// Keep name <= 15 chars (Linux IFNAMSIZ), e.g. "tap-abcdef".
let tn = run_silent("od -An -N3 -tx1 /dev/urandom | tr -d '[:space:]'");
if tn.success && tn.stdout.trim() != "" {
tap = `tap-${tn.stdout.trim()}`;
} else {
tap = "tap-abcd01";
}
let spec = #{
"id": vm_id,
"disk_path": disk_path,
@@ -260,6 +350,23 @@ try {
try {
cloudhv_vm_start(vm_id);
print("✓ VM start invoked");
// After CH creates/opens the TAP, attach it to the bridge to allow DHCP broadcast to reach dnsmasq on br0.
// Avoid racing with CH tap configuration: wait briefly, then attempt attach.
let post_net = `
# Give CH time to finish configuring tap to avoid EBUSY
sleep 1
for i in $(seq 1 30); do
if ip link show ${tap} >/dev/null 2>&1; then
# Enslave to bridge and ensure up; ignore errors (idempotent)
sudo ip link set ${tap} master ${bridge} 2>/dev/null || true
sudo ip link set ${tap} up 2>/dev/null || true
break
fi
sleep 1
done
`;
run_silent(post_net);
} catch (err) {
print(`❌ VM start failed: ${err}`);
exit();
@@ -268,9 +375,68 @@ try {
// ----------- Wait for DHCP lease and print access info -----------
print("\n--- Waiting for DHCP lease from dnsmasq ---");
let vm_ip = "";
// First try deterministic fixed IP via ping (dnsmasq pins MAC->IP)
for i in 0..60 {
// Use a plain command (no shell operators). Success indicates reachability.
let pr = run_silent(`ping -c1 -W1 -I ${bridge} ${vm_static_ip}`);
if pr.success {
vm_ip = vm_static_ip;
break;
}
sleep(1);
let lr = run_silent(`bash -lc "if [ -f ${dnsmasq_lease} ]; then awk '\\$2 ~ /${mac}/ {print \\$3}' ${dnsmasq_lease} | tail -n1; fi"`);
}
for i in 0..180 {
sleep(1);
// Discover and validate IPv4; prefer exact MAC match across common dnsmasq lease locations
let lr = run_silent(`
valid_ipv4() { echo "$1" | grep -Eo '^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$' || true; }
# Candidate lease files (add more if your distro uses a different path)
LEASE_FILES="${dnsmasq_lease} /var/lib/misc/dnsmasq.leases /var/lib/dnsmasq/dnsmasq.leases"
# Include any runtime leases under /run/dnsmasq if present
if ls /run/dnsmasq/*.leases >/dev/null 2>&1; then
LEASE_FILES="$LEASE_FILES $(ls /run/dnsmasq/*.leases 2>/dev/null)"
fi
# 1) Try to find by exact MAC across all known lease files
for f in $LEASE_FILES; do
[ -f "$f" ] || continue
ip="$(awk -v m="${mac}" '$2==m{ip=$3} END{if(ip!="") print ip}' "$f")"
if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi
done
# 2) Fallback: last IP in our br0 subnet across all lease files
for f in $LEASE_FILES; do
[ -f "$f" ] || continue
ip="$(awk '$3 ~ /^192\\.168\\.127\\./ {ip=$3} END{if(ip!="") print ip}' "$f")"
if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi
done
# 3) Fallback: SAL default subnet (172.30.0.0/24) across all lease files
for f in $LEASE_FILES; do
[ -f "$f" ] || continue
ip="$(awk '$3 ~ /^172\\.30\\.0\\./ {ip=$3} END{if(ip!="") print ip}' "$f")"
if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi
done
# 4) ARP gleaning on likely bridges (br0 first, then br-hero) for the known MAC
for dev in ${bridge} br-hero; do
if ip -o link show "$dev" >/dev/null 2>&1; then
ip="$(ip neigh show dev "$dev" | awk '$0 ~ /lladdr ${mac}/ {print $1}' | tail -n1)"
if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi
fi
done
# 5) As a last resort, ARP any 192.168.127.x seen on br0
if ip -o link show ${bridge} >/dev/null 2>&1; then
ip="$(ip neigh show dev ${bridge} | awk '$1 ~ /^192\\.168\\.127\\./ {print $1}' | tail -n1)"
if [ -n "$ip" ] && [ -n "$(valid_ipv4 "$ip")" ]; then echo "$ip"; exit 0; fi
fi
# No valid IP yet
true
`);
if lr.success {
let ip = lr.stdout.trim();
if ip != "" {
@@ -279,11 +445,75 @@ for i in 0..60 {
}
}
}
// Fallback: parse cloud-hypervisor console log for an IPv4 on our expected subnets
let info2 = cloudhv_vm_info(vm_id);
let log_path = info2.runtime.log_file;
if vm_ip == "" {
print("⚠️ Could not discover VM IP from leases yet. You can check leases and retry:");
print(` cat ${dnsmasq_lease}`);
let cp = run_silent(`
if [ -f ${log_path} ]; then
grep -Eo '([0-9]+\\.){3}[0-9]+' ${log_path} | grep -E '^(192\\.168\\.127|172\\.30\\.0)\\.' | tail -n1
fi
`);
if cp.success {
let ip2 = cp.stdout.trim();
if ip2 != "" {
vm_ip = ip2;
}
}
}
if vm_ip == "" {
// Actively populate ARP neighbor tables by sweeping likely subnets
run_silent(`
for ip in $(seq 100 200); do ping -c1 -W1 -I ${bridge} 192.168.127.$ip >/dev/null 2>&1 || true; done
if ip -o link show br-hero >/dev/null 2>&1; then
for ip in $(seq 50 250); do ping -c1 -W1 -I br-hero 172.30.0.$ip >/dev/null 2>&1 || true; done
fi
`);
// Re-check after ARP sweep using the same validated discovery logic
let lr2 = run_silent(`
get_ip_from_leases() {
f="$1"; prefix="$2";
if [ -f "$f" ]; then
awk -v pfx="$prefix" '$3 ~ ("^" pfx) {ip=$3} END{if(ip!="") print ip}' "$f"
fi
}
valid_ipv4() {
echo "$1" | grep -Eo '^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$' || true
}
cand="$(get_ip_from_leases ${dnsmasq_lease} "192.168.127.")"
if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi
cand="$(get_ip_from_leases /var/lib/misc/dnsmasq.leases "192.168.127.")"
if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi
cand="$(get_ip_from_leases /var/lib/misc/dnsmasq.leases "172.30.0.")"
if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi
cand="$(ip neigh show dev ${bridge} | awk '$0 ~ /lladdr ${mac}/ {print $1}' | tail -n1)"
if [ -n "$cand" ] && [ -n "$(valid_ipv4 "$cand")" ]; then echo "$cand"; exit 0; fi
true
`);
if lr2.success {
let ip2 = lr2.stdout.trim();
if ip2 != "" {
vm_ip = ip2;
}
}
}
/* Final sanity: ensure vm_ip is a valid IPv4 dotted-quad before printing */
let _chk = run_silent(`echo "${vm_ip}" | grep -Eo '^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$' || true`);
if !(_chk.success && _chk.stdout.trim() != "") { vm_ip = ""; }
if vm_ip == "" {
print("❌ Could not discover VM IP after 180 seconds.");
print("Diagnostics you can run now:");
print(` tail -n +1 ${dnsmasq_lease}`);
print(" cat /var/lib/misc/dnsmasq.leases | tail -n 5");
print(` ip neigh show dev ${bridge} | grep '${mac}' || true`);
print("Exiting without SSH command because the IP could not be determined.");
exit();
} else {
print(`✓ Lease acquired: ${vm_ip}`);
print("\nSSH command (key-only; default user 'ubuntu'):");
print(`ssh -o StrictHostKeyChecking=no ubuntu@${vm_ip}`);
}
print("\n--- VM access details ---");
@@ -293,13 +523,7 @@ print(`API socket: ${info.spec.api_socket}`);
print(`Console log: ${info.runtime.log_file}`);
print(`Bridge: ${bridge} at ${br_ip}, TAP: ${tap}, MAC: ${mac}`);
print(`Seed: ${seed_iso}`);
if vm_ip != "" {
print("\nSSH command (key-only; default user 'ubuntu'):");
print(`ssh -o StrictHostKeyChecking=no ubuntu@${vm_ip}`);
} else {
print("\nSSH command (replace <IP> after you see a lease):");
print(`ssh -o StrictHostKeyChecking=no ubuntu@<IP>`);
}
/* SSH command already printed above when lease was acquired */
print("\nCleanup hints (manual):");
print(`- Stop dnsmasq: sudo kill \$(cat ${dnsmasq_pid})`);