mirror of
https://github.com/eth-act/ere.git
synced 2026-02-19 11:54:42 -05:00
Add 1 hour timeout for zisk proving (#245)
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4170,6 +4170,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror 2.0.12",
|
||||
"tracing",
|
||||
"wait-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -79,6 +79,7 @@ tracing = "0.1.41"
|
||||
tracing-subscriber = "0.3.19"
|
||||
twirp = "0.9.1"
|
||||
twirp-build = "0.9.0"
|
||||
wait-timeout = "0.2.1"
|
||||
|
||||
# Airbender dependencies
|
||||
airbender_execution_utils = { git = "https://github.com/matter-labs/zksync-airbender", package = "execution_utils", tag = "v0.5.1" }
|
||||
|
||||
@@ -164,7 +164,8 @@ impl ServerContainer {
|
||||
.inherit_env("ZISK_SHARED_TABLES")
|
||||
.inherit_env("ZISK_MAX_STREAMS")
|
||||
.inherit_env("ZISK_NUMBER_THREADS_WITNESS")
|
||||
.inherit_env("ZISK_MAX_WITNESS_STORED"),
|
||||
.inherit_env("ZISK_MAX_WITNESS_STORED")
|
||||
.inherit_env("ZISK_PROVE_TIMEOUT_SEC"),
|
||||
_ => cmd,
|
||||
};
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ strum = { workspace = true, features = ["derive"] }
|
||||
tempfile.workspace = true
|
||||
thiserror.workspace = true
|
||||
tracing.workspace = true
|
||||
wait-timeout.workspace = true
|
||||
|
||||
# Local dependencies
|
||||
ere-compile-utils = { workspace = true, optional = true }
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::{
|
||||
program::ZiskProgram,
|
||||
zkvm::sdk::{RomDigest, ZiskOptions, ZiskSdk, ZiskServer},
|
||||
zkvm::sdk::{RomDigest, START_SERVER_TIMEOUT, ZiskOptions, ZiskSdk, ZiskServer},
|
||||
};
|
||||
use anyhow::bail;
|
||||
use ere_zkvm_interface::zkvm::{
|
||||
@@ -46,7 +46,7 @@ impl EreZisk {
|
||||
|
||||
if server
|
||||
.as_ref()
|
||||
.is_none_or(|server| server.status().is_err())
|
||||
.is_none_or(|server| server.status(START_SERVER_TIMEOUT).is_err())
|
||||
{
|
||||
const MAX_RETRY: usize = 3;
|
||||
let mut retry = 0;
|
||||
|
||||
@@ -26,6 +26,9 @@ pub enum Error {
|
||||
#[error("Server crashed")]
|
||||
ServerCrashed,
|
||||
|
||||
#[error("Timeout waiting for server proving")]
|
||||
TimeoutWaitingServerProving,
|
||||
|
||||
#[error("Timeout waiting for server ready")]
|
||||
TimeoutWaitingServerReady,
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::{
|
||||
iter,
|
||||
net::{Ipv4Addr, TcpStream},
|
||||
path::{Path, PathBuf},
|
||||
process::{Child, Command},
|
||||
process::{Child, Command, Stdio},
|
||||
sync::OnceLock,
|
||||
thread,
|
||||
time::{Duration, Instant},
|
||||
@@ -15,6 +15,11 @@ use std::{
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
use tempfile::tempdir;
|
||||
use tracing::{error, info};
|
||||
use wait_timeout::ChildExt;
|
||||
|
||||
pub const START_SERVER_TIMEOUT: Duration = Duration::from_secs(120); // 2 mins
|
||||
pub const SHUTDOWN_SERVER_TIMEOUT: Duration = Duration::from_secs(30); // 30 secs
|
||||
pub const DEFAULT_PROVE_TIMEOUT: Duration = Duration::from_secs(3600); // 1 hour
|
||||
|
||||
/// Merkle root of ROM trace generated by `cargo-zisk rom-setup`.
|
||||
pub type RomDigest = [u64; 4];
|
||||
@@ -334,16 +339,32 @@ pub struct ZiskServer {
|
||||
impl Drop for ZiskServer {
|
||||
fn drop(&mut self) {
|
||||
info!("Shutting down ZisK server");
|
||||
let result = Command::new("cargo-zisk")
|
||||
|
||||
let mut cmd = Command::new("cargo-zisk");
|
||||
let result = cmd
|
||||
.args(["prove-client", "shutdown"])
|
||||
.args(self.options.prove_client_args())
|
||||
.output();
|
||||
if result.is_err() || result.as_ref().is_ok_and(|output| !output.status.success()) {
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.and_then(
|
||||
|mut child| match child.wait_timeout(SHUTDOWN_SERVER_TIMEOUT)? {
|
||||
Some(_) => child.wait_with_output(),
|
||||
None => {
|
||||
child.kill().ok();
|
||||
Err(std::io::Error::other("shutdown command timed out"))
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
if result.as_ref().is_ok_and(|output| output.status.success()) {
|
||||
info!("Shutdown ZisK server");
|
||||
} else {
|
||||
error!(
|
||||
"Failed to shutdown ZisK server{}",
|
||||
"Failed to shutdown ZisK server: {}",
|
||||
result
|
||||
.map(|output| format!(": {}", String::from_utf8_lossy(&output.stderr)))
|
||||
.unwrap_or_default()
|
||||
.map(|output| String::from_utf8_lossy(&output.stderr).to_string())
|
||||
.unwrap_or_else(|err| err.to_string())
|
||||
);
|
||||
error!("Shutdown server child process and asm services manually...");
|
||||
let _ = self.child.kill();
|
||||
@@ -351,20 +372,34 @@ impl Drop for ZiskServer {
|
||||
shutdown_asm_service(23116);
|
||||
shutdown_asm_service(23117);
|
||||
remove_shm_files();
|
||||
} else {
|
||||
info!("Shutdown ZisK server");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ZiskServer {
|
||||
/// Get status of server.
|
||||
pub fn status(&self) -> Result<ZiskServerStatus, Error> {
|
||||
pub fn status(&self, timeout: Duration) -> Result<ZiskServerStatus, Error> {
|
||||
let mut cmd = Command::new("cargo-zisk");
|
||||
let output = cmd
|
||||
let mut child = cmd
|
||||
.args(["prove-client", "status"])
|
||||
.args(self.options.prove_client_args())
|
||||
.output()
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|err| CommonError::command(&cmd, err))?;
|
||||
|
||||
if child
|
||||
.wait_timeout(timeout)
|
||||
.map_err(|err| CommonError::command(&cmd, err))?
|
||||
.is_none()
|
||||
{
|
||||
// Timeout reached, kill the process
|
||||
child.kill().ok();
|
||||
return Err(Error::TimeoutWaitingServerReady);
|
||||
}
|
||||
|
||||
let output = child
|
||||
.wait_with_output()
|
||||
.map_err(|err| CommonError::command(&cmd, err))?;
|
||||
|
||||
if !output.status.success() {
|
||||
@@ -425,11 +460,20 @@ impl ZiskServer {
|
||||
))?;
|
||||
}
|
||||
|
||||
// By default set 1 hour timeout for prove.
|
||||
let prove_timeout = env::var("ZISK_PROVE_TIMEOUT_SEC")
|
||||
.ok()
|
||||
.and_then(|timeout| timeout.parse::<u64>().ok())
|
||||
.map(Duration::from_secs)
|
||||
.unwrap_or(DEFAULT_PROVE_TIMEOUT);
|
||||
|
||||
// ZisK server will finish the `prove` requested above then respond the
|
||||
// following `status`. So if the following `status` succeeds, the proof
|
||||
// should also be ready.
|
||||
self.status().map_err(|err| {
|
||||
if err.to_string().contains("EOF") {
|
||||
self.status(prove_timeout).map_err(|err| {
|
||||
if matches!(err, Error::TimeoutWaitingServerReady) {
|
||||
Error::TimeoutWaitingServerProving
|
||||
} else if err.to_string().contains("EOF") {
|
||||
Error::ServerCrashed
|
||||
} else {
|
||||
err
|
||||
@@ -455,14 +499,16 @@ impl ZiskServer {
|
||||
|
||||
/// Wait until the server status to be idle.
|
||||
fn wait_until_ready(&self) -> Result<(), Error> {
|
||||
const TIMEOUT: Duration = Duration::from_secs(120); // 2mins
|
||||
const INTERVAL: Duration = Duration::from_secs(1);
|
||||
|
||||
info!("Waiting until server is ready...");
|
||||
|
||||
let start = Instant::now();
|
||||
while !matches!(self.status(), Ok(ZiskServerStatus::Idle)) {
|
||||
if start.elapsed() > TIMEOUT {
|
||||
while !matches!(
|
||||
self.status(START_SERVER_TIMEOUT),
|
||||
Ok(ZiskServerStatus::Idle)
|
||||
) {
|
||||
if start.elapsed() > START_SERVER_TIMEOUT {
|
||||
return Err(Error::TimeoutWaitingServerReady);
|
||||
}
|
||||
thread::sleep(INTERVAL);
|
||||
|
||||
Reference in New Issue
Block a user