From c529f9fcc9aa9acd69ea9e65f307691094a139db Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Wed, 25 Feb 2026 11:37:26 +0100 Subject: [PATCH] Validate that the database connection on open is usable for reads and writes This handles the case where, for whatever reason, the database is actually locked externally and no transaction can be run against it. This enables the `database_externally_locked` test. --- glean-core/src/database/sqlite.rs | 70 +++++++++++++++++--- glean-core/src/database/sqlite/connection.rs | 8 +++ glean-core/src/database/sqlite/schema.rs | 7 ++ glean-core/src/error.rs | 17 +++-- glean-core/tests/sqlite.rs | 61 ++++++++++++----- 5 files changed, 129 insertions(+), 34 deletions(-) diff --git a/glean-core/src/database/sqlite.rs b/glean-core/src/database/sqlite.rs index 797f2e6114..259fd6aca4 100644 --- a/glean-core/src/database/sqlite.rs +++ b/glean-core/src/database/sqlite.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::fmt::{self, Display}; use std::fs; use std::num::NonZeroU64; use std::path::Path; @@ -23,7 +24,6 @@ use crate::common_metric_data::CommonMetricDataInternal; use crate::database::migration::{self, MigrationState}; use crate::metrics::dual_labeled_counter::RECORD_SEPARATOR; use crate::metrics::Metric; -use crate::Error; use crate::Glean; use crate::Lifetime; use crate::Result; @@ -34,7 +34,7 @@ mod schema; #[derive(Debug)] pub enum LoadState { Ok, - Err(Error), + Err(OpenError), } #[derive(Debug, PartialEq, Eq, Copy, Clone)] @@ -102,7 +102,54 @@ fn database_size(dir: &Path) -> Option { NonZeroU64::new(total_size) } -pub fn sqlite_open(path: &Path) -> std::result::Result<(Connection, LoadState), Error> { +#[derive(Debug)] +pub enum OpenError { + IncompatibleVersion(u32), + Corrupt, + SqlError(rusqlite::Error), + RecoveryError(std::io::Error), +} + +impl std::error::Error for OpenError {} + +impl Display for OpenError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use OpenError::*; + match self { + IncompatibleVersion(v) => write!(f, "Incompatible database version: {v}"), + Corrupt => write!(f, "Database is corrupt"), + SqlError(err) => write!(f, "Error executing SQL: {err}"), + RecoveryError(err) => write!( + f, + "Failed to recover a corrupt database due to an error deleting the file: {err}" + ), + } + } +} + +impl From for OpenError { + fn from(value: rusqlite::Error) -> Self { + match value { + rusqlite::Error::SqliteFailure(e, _) + if matches!(e.code, ErrorCode::DatabaseCorrupt | ErrorCode::NotADatabase) => + { + Self::Corrupt + } + _ => Self::SqlError(value), + } + } +} + +impl From for OpenError { + fn from(value: SchemaError) -> Self { + match value { + SchemaError::Sqlite(err) => OpenError::SqlError(err), + SchemaError::UnsupportedSchemaVersion(v) => OpenError::IncompatibleVersion(v), + } + } +} + +pub fn sqlite_open(path: &Path) -> std::result::Result<(Connection, LoadState), OpenError> { // TODO(bug 2049292): Make this more robust, use the correct errors and see how we can test all the branches // properly. match Connection::new::(path) { @@ -112,24 +159,24 @@ pub fn sqlite_open(path: &Path) -> std::result::Result<(Connection, LoadState), ErrorCode::PermissionDenied => Err(e.into()), ErrorCode::NotADatabase => { log::debug!("sqlite failed: not a database. starting from scratch."); - fs::remove_file(path).map_err(|_| rkv::StoreError::FileInvalid)?; + fs::remove_file(path).map_err(OpenError::RecoveryError)?; // Now try again, we only handle that error once. let conn = Connection::new::(path)?; - Ok((conn, LoadState::Err(e.into()))) + Ok((conn, LoadState::Err(OpenError::Corrupt))) } ErrorCode::CannotOpen => { log::debug!("sqlite failed: cannot open. starting from scratch."); - fs::remove_file(path).map_err(|_| rkv::StoreError::FileInvalid)?; + fs::remove_file(path).map_err(OpenError::RecoveryError)?; // Now try again, we only handle that error once. let conn = Connection::new::(path)?; - Ok((conn, LoadState::Err(e.into()))) + Ok((conn, LoadState::Err(OpenError::Corrupt))) } _ => Err(e.into()), } } Err(err @ SchemaError::Sqlite(SqlError::SqlInputError { .. })) => { log::debug!("sqlite failed: schema migration failed. starting from scratch."); - fs::remove_file(path).map_err(|_| rkv::StoreError::FileInvalid)?; + fs::remove_file(path).map_err(OpenError::RecoveryError)?; // Now try again, we only handle that error once. let conn = Connection::new::(path)?; Ok((conn, LoadState::Err(err.into()))) @@ -198,7 +245,12 @@ impl Database { /// Get the load state. pub fn load_state(&self) -> Option { if let LoadState::Err(e) = &self.load_state { - Some(e.to_string()) + Some(match e { + OpenError::IncompatibleVersion(v) => format!("incompatible version: {v}"), + OpenError::Corrupt => "database file corrupt".to_string(), + OpenError::SqlError(error) => format!("sql error: {error:?}"), + OpenError::RecoveryError(error) => format!("recovery error: {error:?}"), + }) } else { None } diff --git a/glean-core/src/database/sqlite/connection.rs b/glean-core/src/database/sqlite/connection.rs index 2e8aa54869..1bddefe5a1 100644 --- a/glean-core/src/database/sqlite/connection.rs +++ b/glean-core/src/database/sqlite/connection.rs @@ -33,6 +33,13 @@ pub trait ConnectionOpener { /// Upgrades an existing physical database to the schema with /// the given version. fn upgrade(tx: &mut Transaction<'_>, to_version: NonZeroU32) -> Result<(), Self::Error>; + + /// Validate that the database is usable. + /// + /// Called after `create` / `upgrade`. + fn validate(_tx: &mut Transaction<'_>) -> Result<(), Self::Error> { + Ok(()) + } } /// A thread-safe wrapper around a connection to a physical SQLite database. @@ -72,6 +79,7 @@ impl Connection { // so that upgrading to it again in the future can fix up any // invariants that our version might not uphold. tx.execute_batch(&format!("PRAGMA user_version = {}", O::MAX_SCHEMA_VERSION))?; + O::validate(&mut tx)?; tx.commit()?; Ok(Self::with_connection(conn)) } diff --git a/glean-core/src/database/sqlite/schema.rs b/glean-core/src/database/sqlite/schema.rs index 42e0ea8407..5be7df4fee 100644 --- a/glean-core/src/database/sqlite/schema.rs +++ b/glean-core/src/database/sqlite/schema.rs @@ -34,6 +34,7 @@ impl ConnectionOpener for Schema { PRAGMA temp_store = MEMORY; -- allows adding incremental cleanup later PRAGMA auto_vacuum = INCREMENTAL; + PRAGMA busy_timeout = 0; ", )?; @@ -65,6 +66,12 @@ impl ConnectionOpener for Schema { fn upgrade(_: &mut Transaction<'_>, to_version: NonZeroU32) -> Result<(), Self::Error> { Err(SchemaError::UnsupportedSchemaVersion(to_version.get())) } + + fn validate(tx: &mut Transaction<'_>) -> Result<(), Self::Error> { + // A query selecting every field, it doesn't need to return anything. + tx.execute_batch("SELECT id, ping, lifetime, labels, value FROM telemetry WHERE 1 = 0")?; + Ok(()) + } } #[derive(thiserror::Error, Debug)] diff --git a/glean-core/src/error.rs b/glean-core/src/error.rs index 58823a93d9..9f0579f743 100644 --- a/glean-core/src/error.rs +++ b/glean-core/src/error.rs @@ -9,7 +9,7 @@ use std::result; use rkv::StoreError; -use crate::database::sqlite::SchemaError; +use crate::database::sqlite::{OpenError, SchemaError}; /// A specialized [`Result`] type for this crate's operations. /// @@ -173,15 +173,18 @@ impl From for Error { } } -impl From for Error { - fn from(error: SchemaError) -> Error { +impl From for Error { + fn from(error: OpenError) -> Error { match error { - SchemaError::Sqlite(err) => Error { - kind: ErrorKind::SQLite(err), + OpenError::IncompatibleVersion(v) => Error { + kind: ErrorKind::Schema(SchemaError::UnsupportedSchemaVersion(v)), }, - err => Error { - kind: ErrorKind::Schema(err), + // TODO + OpenError::Corrupt => Error { + kind: ErrorKind::NotInitialized, }, + OpenError::SqlError(error) => error.into(), + OpenError::RecoveryError(error) => error.into(), } } } diff --git a/glean-core/tests/sqlite.rs b/glean-core/tests/sqlite.rs index 242e82ed3c..35a68534c7 100644 --- a/glean-core/tests/sqlite.rs +++ b/glean-core/tests/sqlite.rs @@ -9,7 +9,9 @@ use crate::common::*; use glean_core::metrics::*; use glean_core::CommonMetricData; +use glean_core::Glean; use glean_core::Lifetime; +use glean_core::SessionMode; use rusqlite::params; use rusqlite::TransactionBehavior; use uuid::uuid; @@ -52,6 +54,9 @@ fn database_file_is_not_sqlite() { let client_id = clientid_metric().get_value(&glean, None); assert!(client_id.is_some()); + + let load_error = load_error_metric().get_value(&glean, None).unwrap(); + assert_eq!("database file corrupt", load_error); } #[test] @@ -75,12 +80,11 @@ fn database_contains_wrong_table() { let client_id = clientid_metric().get_value(&glean, None); assert!(client_id.is_some()); - let load_error = load_error_metric().get_value(&glean, None); - assert!(load_error.is_some()); + let load_error = load_error_metric().get_value(&glean, None).unwrap(); + assert!(load_error.starts_with("sql error:")); } #[test] -#[ignore] fn database_contains_correct_user_version_but_wrong_table() { let temp = { let (glean, temp) = new_glean(None); @@ -100,8 +104,8 @@ fn database_contains_correct_user_version_but_wrong_table() { let client_id = clientid_metric().get_value(&glean, None); assert!(client_id.is_some()); - let load_error = load_error_metric().get_value(&glean, None); - assert!(load_error.is_some()); + let load_error = load_error_metric().get_value(&glean, None).unwrap(); + assert!(load_error.starts_with("sql error:")); } #[test] @@ -153,13 +157,14 @@ fn higher_user_version_upgrade_does_not_crash() { let client_id = clientid_metric().get_value(&glean, None).unwrap(); assert_eq!(first_client_id, client_id); + + let load_error = load_error_metric().get_value(&glean, None); + assert!(load_error.is_none()); } // Permissions only really work on Unix systems, definitely not on Windows #[cfg(unix)] mod unix { - use glean_core::Glean; - use super::*; #[test] @@ -205,14 +210,12 @@ mod unix { } } -// TODO(bug 2049295): -// This currently fails. -// The database is locked, so Glean can't access it. -// It's unclear how we should handle that. -// It's not a particular likely case to happen in practice. #[test] -#[ignore] fn database_externally_locked() { + // This is NOT the usual case. + // But if the database is already locked, there's little we can do. + // This behavior is the same as if we don't have permissions to access the database file. + let temp = { let (glean, temp) = new_glean(None); drop(glean); @@ -220,13 +223,35 @@ fn database_externally_locked() { }; let path = temp.path().join("db").join("glean.sqlite"); - let mut conn = rusqlite::Connection::open(path).unwrap(); + let mut conn = rusqlite::Connection::open(&path).unwrap(); let _tx = conn .transaction_with_behavior(TransactionBehavior::Immediate) .unwrap(); - let (glean, _temp) = new_glean(Some(temp)); - - let client_id = clientid_metric().get_value(&glean, None); - assert!(client_id.is_some()); + let cfg = glean_core::InternalConfiguration { + data_path: path.display().to_string(), + application_id: GLOBAL_APPLICATION_ID.into(), + language_binding_name: "Rust".into(), + upload_enabled: true, + max_events: None, + delay_ping_lifetime_io: false, + app_build: "Unknown".into(), + use_core_mps: false, + trim_data_to_registered_pings: false, + log_level: None, + rate_limit: None, + enable_event_timestamps: false, + experimentation_id: None, + enable_internal_pings: true, + ping_schedule: Default::default(), + ping_lifetime_threshold: 0, + ping_lifetime_max_time: 0, + max_pending_pings_count: None, + max_pending_pings_directory_size: None, + session_inactivity_timeout_ms: 0, + session_mode: SessionMode::Auto, + session_sample_rate: 1.0, + }; + let glean = Glean::new(cfg); + assert!(glean.is_err()); }