diff --git a/src/placement/tests/place_obj_common.h b/src/placement/tests/place_obj_common.h index 537fbe963a2..34abd403b77 100644 --- a/src/placement/tests/place_obj_common.h +++ b/src/placement/tests/place_obj_common.h @@ -1,11 +1,9 @@ /** * (C) Copyright 2016-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ -#define D_LOGFAC DD_FAC(tests) - #ifndef __PL_MAP_COMMON_H__ #define __PL_MAP_COMMON_H__ diff --git a/src/placement/tests/placement_test.c b/src/placement/tests/placement_test.c index 1649fdd7fa1..c86d0e10ec9 100644 --- a/src/placement/tests/placement_test.c +++ b/src/placement/tests/placement_test.c @@ -1,9 +1,11 @@ /** * (C) Copyright 2021-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent * */ +#define D_LOGFAC DD_FAC(tests) #include #include "place_obj_common.h" diff --git a/src/utils/ddb/ddb_commands.c b/src/utils/ddb/ddb_commands.c index 04cdd4e41ce..77553fe2bd2 100644 --- a/src/utils/ddb/ddb_commands.c +++ b/src/utils/ddb/ddb_commands.c @@ -83,7 +83,7 @@ ddb_run_open(struct ddb_ctx *ctx, struct open_options *opt) DDB_POOL_SHOULD_CLOSE(ctx); ctx->dc_write_mode = opt->write_mode; - return dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, 0); + return dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, 0, ctx->dc_write_mode); } int @@ -1101,7 +1101,8 @@ ddb_run_feature(struct ddb_ctx *ctx, struct feature_options *opt) if (!opt->db_path || strnlen(opt->db_path, PATH_MAX) == 0) opt->db_path = ctx->dc_db_path; - rc = dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, VOS_POF_FOR_FEATURE_FLAG); + rc = dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, VOS_POF_FOR_FEATURE_FLAG, + ctx->dc_write_mode); if (rc) return rc; close = true; diff --git a/src/utils/ddb/ddb_main.c b/src/utils/ddb/ddb_main.c index 8b66a4b17d5..050ae9d9176 100644 --- a/src/utils/ddb/ddb_main.c +++ b/src/utils/ddb/ddb_main.c @@ -252,7 +252,8 @@ ddb_main(struct ddb_io_ft *io_ft, int argc, char *argv[]) if (!SUCCESS(rc)) D_GOTO(done, rc); if (open) { - rc = dv_pool_open(pa.pa_pool_path, pa.pa_db_path, &ctx.dc_poh, 0); + rc = + dv_pool_open(pa.pa_pool_path, pa.pa_db_path, &ctx.dc_poh, 0, ctx.dc_write_mode); if (!SUCCESS(rc)) D_GOTO(done, rc); } diff --git a/src/utils/ddb/ddb_vos.c b/src/utils/ddb/ddb_vos.c index 7d4409a36c2..4ee22fbe099 100644 --- a/src/utils/ddb/ddb_vos.c +++ b/src/utils/ddb/ddb_vos.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -27,9 +28,11 @@ anchors, cb, NULL, args, NULL) int -dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags) +dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags, + bool write_mode) { struct vos_file_parts path_parts = {0}; + int cow_val; int rc; /* @@ -47,11 +50,34 @@ dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t strncpy(path_parts.vf_db_path, db_path, sizeof(path_parts.vf_db_path) - 1); } + /** + * When the user requests read‑only mode (write_mode == false), DDB itself will not attempt + * to modify the pool. However, PMEMOBJ performs several operations that do modify the pool + * during open and/or close, for example: + * - Internal bookkeeping required to ensure resilience in case of an ADR failure (SDS). + * - ULOG replay, which restores the pool to a consistent state. + * These mechanisms cannot be disabled because they are essential for PMEMOBJ to maintain + * the consistency of the pool. + * + * However, since none of these changes need to be persisted when the pool is opened in + * read‑only mode (write_mode == false), we can work around this by mapping the pool using + * copy‑on‑write. Copy‑on‑write allows pages to be read normally, but when a page is + * modified, a new private copy is allocated. As a result, any changes made to + * the mapped memory do not propagate to the persistent medium. + */ + if (!write_mode) { + cow_val = 1; + rc = pmemobj_ctl_set(NULL, "copy_on_write.at_open", &cow_val); + if (rc != 0) { + return daos_errno2der(errno); + } + } + rc = vos_self_init(path_parts.vf_db_path, true, path_parts.vf_target_idx); if (!SUCCESS(rc)) { D_ERROR("Failed to initialize VOS with path '%s': "DF_RC"\n", path_parts.vf_db_path, DP_RC(rc)); - return rc; + goto exit; } rc = vos_pool_open(path, path_parts.vf_pool_uuid, flags, poh); @@ -60,6 +86,13 @@ dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t vos_self_fini(); } +exit: + if (!write_mode) { + /** Restore the default value. */ + cow_val = 0; + pmemobj_ctl_set(NULL, "copy_on_write.at_open", &cow_val); + } + return rc; } diff --git a/src/utils/ddb/ddb_vos.h b/src/utils/ddb/ddb_vos.h index 7d1da8900fc..675c4ceb33b 100644 --- a/src/utils/ddb/ddb_vos.h +++ b/src/utils/ddb/ddb_vos.h @@ -52,7 +52,8 @@ struct ddb_array { /* Open and close a pool for a ddb_ctx */ int - dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags); + dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags, + bool write_mode); int dv_pool_close(daos_handle_t poh); int dv_pool_destroy(const char *path, const char *db_path); diff --git a/src/utils/ddb/tests/ddb_commands_tests.c b/src/utils/ddb/tests/ddb_commands_tests.c index 35f6ff7299e..81bc9673dfe 100644 --- a/src/utils/ddb/tests/ddb_commands_tests.c +++ b/src/utils/ddb/tests/ddb_commands_tests.c @@ -583,7 +583,7 @@ dcv_suit_setup(void **state) /* test setup creates the pool, but doesn't open it ... leave it open for these tests */ tctx = *state; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true)); g_ctx.dc_poh = tctx->dvt_poh; diff --git a/src/utils/ddb/tests/ddb_main_tests.c b/src/utils/ddb/tests/ddb_main_tests.c index 533f64fb422..35a100cf063 100644 --- a/src/utils/ddb/tests/ddb_main_tests.c +++ b/src/utils/ddb/tests/ddb_main_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -242,7 +243,7 @@ ddb_main_suit_setup(void **state) /* test setup creates the pool, but doesn't open it ... leave it open for these tests */ tctx = *state; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true)); return 0; } diff --git a/src/utils/ddb/tests/ddb_vos_tests.c b/src/utils/ddb/tests/ddb_vos_tests.c index 2812a775763..f5eb354642e 100644 --- a/src/utils/ddb/tests/ddb_vos_tests.c +++ b/src/utils/ddb/tests/ddb_vos_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -13,6 +14,8 @@ #include "ddb_cmocka.h" #include "ddb_test_driver.h" +#include "../../placement/tests/place_obj_common.h" + /* * The tests in this file depend on a VOS instance with a bunch of data written. The tests will * verify that different parts of the VOS tree can be navigated/iterated. The way the @@ -182,13 +185,13 @@ open_pool_test(void **state) daos_handle_t poh; struct dt_vos_pool_ctx *tctx = *state; - assert_rc_equal(-DER_INVAL, dv_pool_open("/bad/path", NULL, &poh, 0)); + assert_rc_equal(-DER_INVAL, dv_pool_open("/bad/path", NULL, &poh, 0, false)); - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0, false)); assert_success(dv_pool_close(poh)); /* should be able to open again after closing */ - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0, false)); assert_success(dv_pool_close(poh)); } @@ -1087,7 +1090,7 @@ dv_test_setup(void **state) active_entry_handler_called = 0; committed_entry_handler_called = 0; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true)); return 0; } @@ -1108,7 +1111,8 @@ pool_flags_tests(void **state) uint64_t compat_flags; uint64_t incompat_flags; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, VOS_POF_FOR_FEATURE_FLAG)); + assert_success( + dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, VOS_POF_FOR_FEATURE_FLAG, true)); assert_success(dv_pool_get_flags(poh, &compat_flags, &incompat_flags)); assert(compat_flags == 0); assert(incompat_flags == 0); @@ -1120,6 +1124,84 @@ pool_flags_tests(void **state) assert_success(dv_pool_close(poh)); } +#define SHA256_DIGEST_LEN 64 + +struct file_state { + struct stat stat; + char digest[SHA256_DIGEST_LEN]; +}; + +#define FILE_STATE_PRE 0 +#define FILE_STATE_POST 1 + +/** + * Use sha256sum utility to get the sha256 digest of the file. + * + * \note sha256sum was used to avoid introducing libcrypto dependency. + */ +static void +sha256sum(const char *file, char digest[SHA256_DIGEST_LEN]) +{ + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "sha256sum \"%s\"", file); + + FILE *fp = popen(cmd, "r"); + assert_non_null(fp); + + /** sha256sum prints: <64 hex chars> */ + assert_int_equal(fscanf(fp, "%" STR(SHA256_DIGEST_LEN) "s", digest), 1); + + pclose(fp); +} + +/** + * Simple sequence of operations: + * - stat + sha256sum + * - open + * - update a single value + * - close + * - stat + sha256sum + * + * \param[in] tctx Test context to get the pool name and access to the pool handle. + * \param[out] fs [0] state of the pool file at the beginning and [1] at the end. + * \param[in] write_mode Whether to open the pool in the write mode. + */ +static void +helper_stat_open_modify_close_stat(struct dt_vos_pool_ctx *tctx, struct file_state fs[2], + bool write_mode) +{ + const char *path = tctx->dvt_pmem_file; + + assert_int_equal(stat(path, &fs[FILE_STATE_PRE].stat), 0); + sha256sum(path, fs[FILE_STATE_PRE].digest); + + assert_success(dv_pool_open(path, NULL, &tctx->dvt_poh, 0, write_mode)); + update_value_to_modify_tests((void **)&tctx); + assert_success(dv_pool_close(tctx->dvt_poh)); + + assert_int_equal(stat(path, &fs[FILE_STATE_POST].stat), 0); + sha256sum(path, fs[FILE_STATE_POST].digest); +} + +static void +read_only_vs_write_mode_test(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct file_state fs[2]; + + /** In read‑only mode, the pool contents remain unchanged, and its mtime stays the same. */ + helper_stat_open_modify_close_stat(tctx, fs, false /** read-only */); + assert_int_equal(fs[FILE_STATE_PRE].stat.st_mtime, fs[FILE_STATE_POST].stat.st_mtime); + assert_memory_equal(fs[FILE_STATE_PRE].digest, fs[FILE_STATE_PRE].digest, + SHA256_DIGEST_LEN); + + /** In write mode, the pool contents will change and its mtime will increase. */ + helper_stat_open_modify_close_stat(tctx, fs, true /** read-write */); + assert_true(fs[FILE_STATE_PRE].stat.st_mtime < fs[FILE_STATE_POST].stat.st_mtime); + assert_memory_not_equal(fs[FILE_STATE_PRE].digest, fs[FILE_STATE_POST].digest, + SHA256_DIGEST_LEN); +} + /* * All these tests use the same VOS tree that is created at suit_setup. Therefore, tests * that modify the state of the tree (delete, add, etc) should be run after all others. @@ -1149,6 +1231,8 @@ const struct CMUnitTest dv_test_cases[] = { TEST(dtx_abort_active_table), TEST(path_verify), {"pool_flag_update", pool_flags_tests, NULL, NULL}, + {"read_only_vs_write_mode", read_only_vs_write_mode_test, NULL, + NULL}, /* don't want this test to run with setup */ }; int diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 40927b22d28..f7a00dd0918 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -316,9 +316,7 @@ struct vos_pool { /** memory attribute of the @vp_umm */ struct umem_attr vp_uma; /** memory class instance of the pool */ - struct umem_instance vp_umm; - /** Size of pool file */ - uint64_t vp_size; + struct umem_instance vp_umm; /** Features enabled for this pool */ uint64_t vp_feats; /** btr handle for the container table */ diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index 7d3f95142da..a6e06d6e026 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1162,16 +1162,6 @@ pool_hop_free(struct d_ulink *hlink) if (daos_handle_is_valid(pool->vp_cont_th)) dbtree_close(pool->vp_cont_th); - if (pool->vp_size != 0) { - rc = munlock((void *)pool->vp_umm.umm_base, pool->vp_size); - if (rc != 0) - D_WARN("Failed to unlock pool memory at "DF_X64": errno=%d (%s)\n", - pool->vp_umm.umm_base, errno, strerror(errno)); - else - D_DEBUG(DB_MGMT, "Unlocked VOS pool memory: "DF_U64" bytes at "DF_X64"\n", - pool->vp_size, pool->vp_umm.umm_base); - } - if (pool->vp_uma.uma_pool) vos_pmemobj_close(pool->vp_uma.uma_pool); @@ -1608,65 +1598,6 @@ vos_pool_destroy(const char *path, uuid_t uuid) return vos_pool_destroy_ex(path, uuid, 0); } -enum { - /** Memory locking flag not initialized */ - LM_FLAG_UNINIT, - /** Memory locking disabled */ - LM_FLAG_DISABLED, - /** Memory locking enabled */ - LM_FLAG_ENABLED -}; - -static void -lock_pool_memory(struct vos_pool *pool) -{ - static int lock_mem = LM_FLAG_UNINIT; - struct rlimit rlim; - size_t lock_bytes; - int rc; - - if (lock_mem == LM_FLAG_UNINIT) { - rc = getrlimit(RLIMIT_MEMLOCK, &rlim); - if (rc != 0) { - D_WARN("getrlimit() failed; errno=%d (%s)\n", errno, strerror(errno)); - lock_mem = LM_FLAG_DISABLED; - return; - } - - if (rlim.rlim_cur != RLIM_INFINITY || rlim.rlim_max != RLIM_INFINITY) { - D_WARN("Infinite rlimit not detected, not locking VOS pool memory\n"); - lock_mem = LM_FLAG_DISABLED; - return; - } - - lock_mem = LM_FLAG_ENABLED; - } - - if (lock_mem == LM_FLAG_DISABLED) - return; - - /* - * Mlock may take several tens of seconds to complete when memory - * is tight, so mlock is skipped in current MD-on-SSD scenario. - */ - if (bio_nvme_configured(SMD_DEV_TYPE_META)) - return; - - lock_bytes = pool->vp_pool_df->pd_scm_sz; - rc = mlock((void *)pool->vp_umm.umm_base, lock_bytes); - if (rc != 0) { - D_WARN("Could not lock memory for VOS pool "DF_U64" bytes at "DF_X64 - "; errno=%d (%s)\n", lock_bytes, pool->vp_umm.umm_base, - errno, strerror(errno)); - return; - } - - /* Only save the size if the locking was successful */ - pool->vp_size = lock_bytes; - D_DEBUG(DB_MGMT, "Locking VOS pool in memory "DF_U64" bytes at "DF_X64"\n", pool->vp_size, - pool->vp_umm.umm_base); -} - static int pool_open_prep(uuid_t uuid, unsigned int flags, struct vos_pool **p_pool) { @@ -1816,7 +1747,6 @@ pool_open_post(struct umem_pool **p_ph, struct vos_pool_df *pool_df, unsigned in vos_space_sys_init(pool); /* Ensure GC is triggered after server restart */ gc_add_pool(pool); - lock_pool_memory(pool); out: DL_CDEBUG(rc != 0, DLOG_ERR, DB_MGMT, rc,