From d95fe5d4eec9ef46d302c364607142a548757a5f Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Tue, 31 Dec 2024 15:53:38 -0500 Subject: [PATCH] sqld: disable checkpoint on primary conn create This commit changes our primary connection initialization code in two ways to achieve the ability to disable checkpointing the wal. 1) We ignore the initial checkpoint that we call directly into sqlite3 before we restore from bottomless. There is a fixme above that explains why we need this but to me right now its not totally clear why without digging deeper into the internals of bottomless. We should do this but for the moment this unblocks us and from the fixme comment it does not sound unsafe rather doing extra work potentially. 2) When bottomless needs to get the local change counter is creates a sqlite connection. When this connection drops it seems like it checkpoints the wal. I took a brief look at the `sqlite3_close` code and did not find anything obvious, I have been told in the past that sqlite3 likes to checkpoint at weird points so this could be one of those. For the moment, the temporary fix like above is to `std::mem::forget` the connection so that `Drop` never gets called and thus `sqlite3_close` never gets called. With both of these changes we now don't checkpoint the wal unless we hit the max size or interval (which for testing I have set very high). This changes are not enabled by default but must be enabled by setting the following env var: ``` LIBSQL_DISABLE_INIT_CHECKPOINTING=1 LIBSQL_BOTTOMLESS_DISABLE_INIT_CHECKPOINTING=1 ``` --- bottomless/src/replicator.rs | 8 ++++++++ libsql-server/src/namespace/configurator/helpers.rs | 13 ++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/bottomless/src/replicator.rs b/bottomless/src/replicator.rs index 180ca38576..6f318fd125 100644 --- a/bottomless/src/replicator.rs +++ b/bottomless/src/replicator.rs @@ -935,6 +935,14 @@ impl Replicator { })?; tracing::trace!("Local change counter: {change_counter}"); + // TODO: we shouldn't leak the connection here but for some reason when this connection get + // dropped it seems to checkpoint the database + if std::env::var("LIBSQL_BOTTOMLESS_DISABLE_INIT_CHECKPOINTING").is_ok() + || std::env::var("LIBSQL_DISABLE_INIT_CHECKPOINTING").is_ok() + { + std::mem::forget(conn); + } + Ok(change_counter.to_be_bytes()) } diff --git a/libsql-server/src/namespace/configurator/helpers.rs b/libsql-server/src/namespace/configurator/helpers.rs index 081eeaef80..c84c963805 100644 --- a/libsql-server/src/namespace/configurator/helpers.rs +++ b/libsql-server/src/namespace/configurator/helpers.rs @@ -84,9 +84,16 @@ pub(super) async fn make_primary_connection_maker( let bottomless_replicator = match primary_config.bottomless_replication { Some(ref options) => { - tracing::debug!("Checkpointing before initializing bottomless"); - crate::replication::primary::logger::checkpoint_db(&db_path.join("data"))?; - tracing::debug!("Checkpointed before initializing bottomless"); + // TODO: figure out why we really need this the fixme above is not clear enough but + // disabling this allows us to prevent checkpointing of the wal file. + if !std::env::var("LIBSQL_DISABLE_INIT_CHECKPOINTING").is_ok() { + tracing::debug!("Checkpointing before initializing bottomless"); + crate::replication::primary::logger::checkpoint_db(&db_path.join("data"))?; + tracing::debug!("Checkpointed before initializing bottomless"); + } else { + tracing::warn!("Disabling initial checkpoint before bottomless"); + } + let options = make_bottomless_options(options, bottomless_db_id, name.clone()); let (replicator, did_recover) = init_bottomless_replicator(db_path.join("data"), options, &restore_option).await?;