fixed cloud hypervisor issues + updated test script (working now)
This commit is contained in:
@@ -8,6 +8,7 @@ use std::time::Duration;
|
||||
|
||||
use sal_os;
|
||||
use sal_process;
|
||||
use crate::qcow2;
|
||||
|
||||
/// Error type for Cloud Hypervisor operations
|
||||
#[derive(Debug)]
|
||||
@@ -216,6 +217,45 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
|
||||
};
|
||||
let log_file = vm_log_path(id).to_string_lossy().into_owned();
|
||||
|
||||
// Ensure API socket directory exists and remove any stale socket file
|
||||
let api_path = Path::new(&api_socket);
|
||||
if let Some(parent) = api_path.parent() {
|
||||
fs::create_dir_all(parent).map_err(|e| CloudHvError::IoError(e.to_string()))?;
|
||||
}
|
||||
// Best-effort removal of stale socket
|
||||
let _ = fs::remove_file(&api_path);
|
||||
|
||||
// Preflight disk: if source is qcow2, convert to raw to avoid CH "Compressed blocks not supported"
|
||||
// This is best-effort: if qemu-img is unavailable or info fails, we skip conversion.
|
||||
let mut disk_to_use = rec.spec.disk_path.clone();
|
||||
if let Ok(info) = qcow2::info(&disk_to_use) {
|
||||
if info.get("format").and_then(|v| v.as_str()) == Some("qcow2") {
|
||||
let dest = vm_dir(id).join("disk.raw").to_string_lossy().into_owned();
|
||||
let cmd = format!(
|
||||
"qemu-img convert -O raw {} {}",
|
||||
shell_escape(&disk_to_use),
|
||||
shell_escape(&dest)
|
||||
);
|
||||
match sal_process::run(&cmd).silent(true).execute() {
|
||||
Ok(res) if res.success => {
|
||||
disk_to_use = dest;
|
||||
}
|
||||
Ok(res) => {
|
||||
return Err(CloudHvError::CommandFailed(format!(
|
||||
"Failed converting qcow2 to raw: {}",
|
||||
res.stderr
|
||||
)));
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(CloudHvError::CommandFailed(format!(
|
||||
"Failed converting qcow2 to raw: {}",
|
||||
e
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build command (minimal args for Phase 2)
|
||||
// We redirect all output to log_file via shell and keep process in background with nohup
|
||||
|
||||
@@ -249,7 +289,7 @@ pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
|
||||
}
|
||||
|
||||
parts.push("--disk".into());
|
||||
parts.push(format!("path={}", rec.spec.disk_path));
|
||||
parts.push(format!("path={}", disk_to_use));
|
||||
parts.push("--cpus".into());
|
||||
parts.push(format!("boot={}", rec.spec.vcpus));
|
||||
parts.push("--memory".into());
|
||||
@@ -342,20 +382,27 @@ pub fn vm_stop(id: &str, force: bool) -> Result<(), CloudHvError> {
|
||||
let _ = sal_process::run(&cmd).die(false).silent(true).execute();
|
||||
}
|
||||
|
||||
// Wait a bit for process to exit
|
||||
// Wait for process to exit (up to ~10s)
|
||||
if let Some(pid) = rec.runtime.pid {
|
||||
for _ in 0..20 {
|
||||
for _ in 0..50 {
|
||||
if !proc_exists(pid) {
|
||||
break;
|
||||
}
|
||||
thread::sleep(Duration::from_millis(200));
|
||||
}
|
||||
// If still alive and force, kill -9
|
||||
// If still alive and force, kill -9 and wait again (up to ~10s)
|
||||
if proc_exists(pid) && force {
|
||||
// Send SIGKILL without extra shell layers; suppress errors/noise
|
||||
let _ = sal_process::run(&format!("kill -9 {}", pid))
|
||||
.die(false)
|
||||
.silent(true)
|
||||
.execute();
|
||||
for _ in 0..50 {
|
||||
if !proc_exists(pid) {
|
||||
break;
|
||||
}
|
||||
thread::sleep(Duration::from_millis(200));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,12 +427,22 @@ pub fn vm_delete(id: &str, delete_disks: bool) -> Result<(), CloudHvError> {
|
||||
let rec: VmRecord = serde_json::from_value(read_json(&p)?)
|
||||
.map_err(|e| CloudHvError::JsonError(e.to_string()))?;
|
||||
|
||||
// Refuse to delete if still running
|
||||
// If appears to be running, attempt a force stop first (best-effort)
|
||||
if let Some(pid) = rec.runtime.pid {
|
||||
if proc_exists(pid) {
|
||||
return Err(CloudHvError::CommandFailed(
|
||||
"VM appears to be running; stop it first".into(),
|
||||
));
|
||||
let _ = vm_stop(id, true);
|
||||
// Re-check original PID for liveness (up to ~5s)
|
||||
for _ in 0..25 {
|
||||
if !proc_exists(pid) {
|
||||
break;
|
||||
}
|
||||
thread::sleep(Duration::from_millis(200));
|
||||
}
|
||||
if proc_exists(pid) {
|
||||
return Err(CloudHvError::CommandFailed(
|
||||
"VM appears to be running; stop it first".into(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -105,6 +105,41 @@ if !missing {
|
||||
print(`⚠️ VM start failed (this can happen if kernel/cmdline are incompatible): ${err}`);
|
||||
}
|
||||
|
||||
print("\n waiting for VM to be ready...");
|
||||
|
||||
// Discover API socket and PID from SAL
|
||||
let info1 = cloudhv_vm_info(vm_id);
|
||||
let api_sock = info1.spec.api_socket;
|
||||
let pid = info1.runtime.pid;
|
||||
|
||||
// 1) Wait for API socket to appear (up to ~50s)
|
||||
let sock_ok = false;
|
||||
for x in 0..50 {
|
||||
if exist(api_sock) { sock_ok = true; break; }
|
||||
sleep(1);
|
||||
}
|
||||
print(`api_sock_exists=${sock_ok} path=${api_sock}`);
|
||||
|
||||
// 2) Probe ch-remote info with retries (up to ~20s)
|
||||
if sock_ok {
|
||||
let info_ok = false;
|
||||
for x in 0..20 {
|
||||
let r = run_silent(`ch-remote-static --api-socket ${api_sock} info`);
|
||||
if r.success {
|
||||
info_ok = true;
|
||||
break;
|
||||
}
|
||||
sleep(1);
|
||||
}
|
||||
if info_ok {
|
||||
print("VM API is ready (ch-remote info OK)");
|
||||
} else {
|
||||
print("⚠️ VM API did not become ready in time (continuing)");
|
||||
}
|
||||
} else {
|
||||
print("⚠️ API socket not found (continuing)");
|
||||
}
|
||||
|
||||
print("\n--- Test 5: Stop VM (graceful) ---");
|
||||
try {
|
||||
cloudhv_vm_stop(vm_id, false);
|
||||
|
Reference in New Issue
Block a user