fixed cloud hypervisor issues + updated test script (working now)

This commit is contained in:
Maxime Van Hees
2025-08-21 13:32:03 +02:00
parent d735316b7f
commit aab2b6f128
2 changed files with 100 additions and 8 deletions

View File

@@ -8,6 +8,7 @@ use std::time::Duration;
use sal_os;
use sal_process;
use crate::qcow2;
/// Error type for Cloud Hypervisor operations
#[derive(Debug)]
@@ -216,6 +217,45 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
};
let log_file = vm_log_path(id).to_string_lossy().into_owned();
// Ensure API socket directory exists and remove any stale socket file
let api_path = Path::new(&api_socket);
if let Some(parent) = api_path.parent() {
fs::create_dir_all(parent).map_err(|e| CloudHvError::IoError(e.to_string()))?;
}
// Best-effort removal of stale socket
let _ = fs::remove_file(&api_path);
// Preflight disk: if source is qcow2, convert to raw to avoid CH "Compressed blocks not supported"
// This is best-effort: if qemu-img is unavailable or info fails, we skip conversion.
let mut disk_to_use = rec.spec.disk_path.clone();
if let Ok(info) = qcow2::info(&disk_to_use) {
if info.get("format").and_then(|v| v.as_str()) == Some("qcow2") {
let dest = vm_dir(id).join("disk.raw").to_string_lossy().into_owned();
let cmd = format!(
"qemu-img convert -O raw {} {}",
shell_escape(&disk_to_use),
shell_escape(&dest)
);
match sal_process::run(&cmd).silent(true).execute() {
Ok(res) if res.success => {
disk_to_use = dest;
}
Ok(res) => {
return Err(CloudHvError::CommandFailed(format!(
"Failed converting qcow2 to raw: {}",
res.stderr
)));
}
Err(e) => {
return Err(CloudHvError::CommandFailed(format!(
"Failed converting qcow2 to raw: {}",
e
)));
}
}
}
}
// Build command (minimal args for Phase 2)
// We redirect all output to log_file via shell and keep process in background with nohup
@@ -249,7 +289,7 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
}
parts.push("--disk".into());
parts.push(format!("path={}", rec.spec.disk_path));
parts.push(format!("path={}", disk_to_use));
parts.push("--cpus".into());
parts.push(format!("boot={}", rec.spec.vcpus));
parts.push("--memory".into());
@@ -342,20 +382,27 @@ pub fn vm_stop(id: &str, force: bool) -> Result<(), CloudHvError> {
let _ = sal_process::run(&cmd).die(false).silent(true).execute();
}
// Wait a bit for process to exit
// Wait for process to exit (up to ~10s)
if let Some(pid) = rec.runtime.pid {
for _ in 0..20 {
for _ in 0..50 {
if !proc_exists(pid) {
break;
}
thread::sleep(Duration::from_millis(200));
}
// If still alive and force, kill -9
// If still alive and force, kill -9 and wait again (up to ~10s)
if proc_exists(pid) && force {
// Send SIGKILL without extra shell layers; suppress errors/noise
let _ = sal_process::run(&format!("kill -9 {}", pid))
.die(false)
.silent(true)
.execute();
for _ in 0..50 {
if !proc_exists(pid) {
break;
}
thread::sleep(Duration::from_millis(200));
}
}
}
@@ -380,12 +427,22 @@ pub fn vm_delete(id: &str, delete_disks: bool) -> Result<(), CloudHvError> {
let rec: VmRecord = serde_json::from_value(read_json(&p)?)
.map_err(|e| CloudHvError::JsonError(e.to_string()))?;
// Refuse to delete if still running
// If appears to be running, attempt a force stop first (best-effort)
if let Some(pid) = rec.runtime.pid {
if proc_exists(pid) {
return Err(CloudHvError::CommandFailed(
"VM appears to be running; stop it first".into(),
));
let _ = vm_stop(id, true);
// Re-check original PID for liveness (up to ~5s)
for _ in 0..25 {
if !proc_exists(pid) {
break;
}
thread::sleep(Duration::from_millis(200));
}
if proc_exists(pid) {
return Err(CloudHvError::CommandFailed(
"VM appears to be running; stop it first".into(),
));
}
}
}

View File

@@ -105,6 +105,41 @@ if !missing {
print(`⚠️ VM start failed (this can happen if kernel/cmdline are incompatible): ${err}`);
}
print("\n waiting for VM to be ready...");
// Discover API socket and PID from SAL
let info1 = cloudhv_vm_info(vm_id);
let api_sock = info1.spec.api_socket;
let pid = info1.runtime.pid;
// 1) Wait for API socket to appear (up to ~50s)
let sock_ok = false;
for x in 0..50 {
if exist(api_sock) { sock_ok = true; break; }
sleep(1);
}
print(`api_sock_exists=${sock_ok} path=${api_sock}`);
// 2) Probe ch-remote info with retries (up to ~20s)
if sock_ok {
let info_ok = false;
for x in 0..20 {
let r = run_silent(`ch-remote-static --api-socket ${api_sock} info`);
if r.success {
info_ok = true;
break;
}
sleep(1);
}
if info_ok {
print("VM API is ready (ch-remote info OK)");
} else {
print("⚠️ VM API did not become ready in time (continuing)");
}
} else {
print("⚠️ API socket not found (continuing)");
}
print("\n--- Test 5: Stop VM (graceful) ---");
try {
cloudhv_vm_stop(vm_id, false);