Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/placement/tests/place_obj_common.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
#define D_LOGFAC DD_FAC(tests)

#ifndef __PL_MAP_COMMON_H__
#define __PL_MAP_COMMON_H__

Expand Down
2 changes: 2 additions & 0 deletions src/placement/tests/placement_test.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
/**
* (C) Copyright 2021-2023 Intel Corporation.
* (C) Copyright 2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*
*/
#define D_LOGFAC DD_FAC(tests)

#include <daos.h>
#include "place_obj_common.h"
Expand Down
5 changes: 3 additions & 2 deletions src/utils/ddb/ddb_commands.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ ddb_run_open(struct ddb_ctx *ctx, struct open_options *opt)
DDB_POOL_SHOULD_CLOSE(ctx);

ctx->dc_write_mode = opt->write_mode;
return dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, 0);
return dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, 0, ctx->dc_write_mode);
}

int
Expand Down Expand Up @@ -1101,7 +1101,8 @@ ddb_run_feature(struct ddb_ctx *ctx, struct feature_options *opt)
if (!opt->db_path || strnlen(opt->db_path, PATH_MAX) == 0)
opt->db_path = ctx->dc_db_path;

rc = dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, VOS_POF_FOR_FEATURE_FLAG);
rc = dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, VOS_POF_FOR_FEATURE_FLAG,
ctx->dc_write_mode);
if (rc)
return rc;
close = true;
Expand Down
3 changes: 2 additions & 1 deletion src/utils/ddb/ddb_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ ddb_main(struct ddb_io_ft *io_ft, int argc, char *argv[])
if (!SUCCESS(rc))
D_GOTO(done, rc);
if (open) {
rc = dv_pool_open(pa.pa_pool_path, pa.pa_db_path, &ctx.dc_poh, 0);
rc =
dv_pool_open(pa.pa_pool_path, pa.pa_db_path, &ctx.dc_poh, 0, ctx.dc_write_mode);
if (!SUCCESS(rc))
D_GOTO(done, rc);
}
Expand Down
37 changes: 35 additions & 2 deletions src/utils/ddb/ddb_vos.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <string.h>
#include <sys/vfs.h>

#include <libpmemobj.h>
#include <daos_srv/vos.h>
#include <daos_srv/smd.h>
#include <vos_internal.h>
Expand All @@ -27,9 +28,11 @@
anchors, cb, NULL, args, NULL)

int
dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags)
dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags,
bool write_mode)
{
struct vos_file_parts path_parts = {0};
int cow_val;
int rc;

/*
Expand All @@ -47,11 +50,34 @@ dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t
strncpy(path_parts.vf_db_path, db_path, sizeof(path_parts.vf_db_path) - 1);
}

/**
* When the user requests read‑only mode (write_mode == false), DDB itself will not attempt
* to modify the pool. However, PMEMOBJ performs several operations that do modify the pool
* during open and/or close, for example:
* - Internal bookkeeping required to ensure resilience in case of an ADR failure (SDS).
* - ULOG replay, which restores the pool to a consistent state.
* These mechanisms cannot be disabled because they are essential for PMEMOBJ to maintain
* the consistency of the pool.
*
* However, since none of these changes need to be persisted when the pool is opened in
* read‑only mode (write_mode == false), we can work around this by mapping the pool using
* copy‑on‑write. Copy‑on‑write allows pages to be read normally, but when a page is
* modified, a new private copy is allocated. As a result, any changes made to
Copy link
Contributor

@Nasf-Fan Nasf-Fan Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If new private copy is allocated, then when open the vos pool again, the operation (read/write) will work against the original one or the new copy?

On the other hand, under non-interactive mode, many ddb sub-commands will open the vos pool implicitly as read-only, such as ddb ls. If it is repeated for a lot of times (for different items), will that cause some trouble for copy-on-write? Under extreme case, will ddb ls may fail because of ENOSPC?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If new private copy is allocated, then when open the vos pool again, the operation (read/write) will work against the original one or the new copy?

This is a volatile copy of modified pages in RAM. These pages are freed once you close the pool / call munmap().

Please see mmap() and MAP_PRIVATE for details.

On the other hand, under non-interactive mode, many ddb sub-commands will open the vos pool implicitly as read-only, such as ddb ls. If it is repeated for a lot of times (for different items), will that cause some trouble for copy-on-write? Under extreme case, will ddb ls may fail because of ENOSPC?

If it is truly read-only I assume no changes will be made no matter the number of items being processed. So, I expect after startup, when a few copied pages will be allocated (for various PMEMOBJ purposes), the memory consumption in this department ought to be flat.

If, despite of the assumption of not making any changes to the pool, VOS actually makes writes to the pool when the user (DDB) requests merely to read the data, the memory consumption will continue to grow. The final signal would be ENOMEM.

If the latter is the case I would argue we should rethink are even able to provide a read-only mode.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure for this. It is better to make some test with you patch, such as repeatedly ddb ls.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the latter is the case I would argue we should rethink are even able to provide a read-only mode.

I do not know how the implicit modification will be triggered when open the vos-pool and what is the potential side-effect. According to your description in DAOS-16964, both the vos file time stamp and some other metadata are changed.

Will that potentially damage the vos-pool? If it is harmful, does it means even if we did not run ddb, related risk still be there when DAOS engine normally opens the vos-pool? Otherwise, if it is harmless, can we ignore such implicit modification to avoid CoW trouble?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will that potentially damage the vos-pool?

Yes. It can. You can imagine e.g. the PMEMOBJ ulog is corrupted. If we have bad luck PMEMOBJ may apply these malicious changes and further mangle the VOS pool contents. Which may prevents us from finding the root cause of whatever problem we are trying to pinpoint.

does it means even if we did not run ddb, related risk still be there when DAOS engine normally opens the vos-pool?

Yes. At the moment opening the VOS pool always does the same things underneath. No matter whether the open is supervised by DDB or daos_engine.

can we ignore such implicit modification to avoid CoW trouble?

It looks like it is not harmless in general. But regardless I think CoW is worth the hassle because we need a read-only mode in my opinion. Otherwise, DDB is not really a debug tool since we cannot control what it does to the VOS file.


  1. I added a test which shows that CoW prevents changes from reaching the backing-file and in result also mtime stays the same. 😁
  2. I have done some measurements to check the memory consumption in both cases. TBH the results are odd. I wrote my observations in the ticket: https://daosio.atlassian.net/browse/DAOS-16964?focusedCommentId=153817 Regardless I think we should proceed with this patch and maybe create a new ticket to understand better why it is the case. Which would be beneficial in three ways:
  • we could potentially decrease the memory usage assuming some of these operations are not necessary for DDB to function.
  • if these operations are not optimal maybe there is a room for performance gains for both DDB and daos_engine
  • it is not good both of us are surprised by this result (I assume you did not know it is the case, did you? 😉 ) so whatever we find out it would improve our understanding of how VOS really works

* the mapped memory do not propagate to the persistent medium.
*/
if (!write_mode) {
cow_val = 1;
rc = pmemobj_ctl_set(NULL, "copy_on_write.at_open", &cow_val);
if (rc != 0) {
return daos_errno2der(errno);
}
}

rc = vos_self_init(path_parts.vf_db_path, true, path_parts.vf_target_idx);
if (!SUCCESS(rc)) {
D_ERROR("Failed to initialize VOS with path '%s': "DF_RC"\n",
path_parts.vf_db_path, DP_RC(rc));
return rc;
goto exit;
}

rc = vos_pool_open(path, path_parts.vf_pool_uuid, flags, poh);
Expand All @@ -60,6 +86,13 @@ dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t
vos_self_fini();
}

exit:
if (!write_mode) {
/** Restore the default value. */
cow_val = 0;
pmemobj_ctl_set(NULL, "copy_on_write.at_open", &cow_val);
}

return rc;
}

Expand Down
3 changes: 2 additions & 1 deletion src/utils/ddb/ddb_vos.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ struct ddb_array {

/* Open and close a pool for a ddb_ctx */
int
dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags);
dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags,
bool write_mode);
int dv_pool_close(daos_handle_t poh);
int
dv_pool_destroy(const char *path, const char *db_path);
Expand Down
2 changes: 1 addition & 1 deletion src/utils/ddb/tests/ddb_commands_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ dcv_suit_setup(void **state)

/* test setup creates the pool, but doesn't open it ... leave it open for these tests */
tctx = *state;
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0));
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true));

g_ctx.dc_poh = tctx->dvt_poh;

Expand Down
3 changes: 2 additions & 1 deletion src/utils/ddb/tests/ddb_main_tests.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2022-2024 Intel Corporation.
* (C) Copyright 2026 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Vdura Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
Expand Down Expand Up @@ -242,7 +243,7 @@ ddb_main_suit_setup(void **state)

/* test setup creates the pool, but doesn't open it ... leave it open for these tests */
tctx = *state;
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0));
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true));

return 0;
}
Expand Down
94 changes: 89 additions & 5 deletions src/utils/ddb/tests/ddb_vos_tests.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2022-2024 Intel Corporation.
* (C) Copyright 2026 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Vdura Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
Expand All @@ -13,6 +14,8 @@
#include "ddb_cmocka.h"
#include "ddb_test_driver.h"

#include "../../placement/tests/place_obj_common.h"

/*
* The tests in this file depend on a VOS instance with a bunch of data written. The tests will
* verify that different parts of the VOS tree can be navigated/iterated. The way the
Expand Down Expand Up @@ -182,13 +185,13 @@ open_pool_test(void **state)
daos_handle_t poh;
struct dt_vos_pool_ctx *tctx = *state;

assert_rc_equal(-DER_INVAL, dv_pool_open("/bad/path", NULL, &poh, 0));
assert_rc_equal(-DER_INVAL, dv_pool_open("/bad/path", NULL, &poh, 0, false));

assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0));
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0, false));
assert_success(dv_pool_close(poh));

/* should be able to open again after closing */
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0));
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0, false));
assert_success(dv_pool_close(poh));
}

Expand Down Expand Up @@ -1087,7 +1090,7 @@ dv_test_setup(void **state)

active_entry_handler_called = 0;
committed_entry_handler_called = 0;
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0));
assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true));
return 0;
}

Expand All @@ -1108,7 +1111,8 @@ pool_flags_tests(void **state)
uint64_t compat_flags;
uint64_t incompat_flags;

assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, VOS_POF_FOR_FEATURE_FLAG));
assert_success(
dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, VOS_POF_FOR_FEATURE_FLAG, true));
assert_success(dv_pool_get_flags(poh, &compat_flags, &incompat_flags));
assert(compat_flags == 0);
assert(incompat_flags == 0);
Expand All @@ -1120,6 +1124,84 @@ pool_flags_tests(void **state)
assert_success(dv_pool_close(poh));
}

#define SHA256_DIGEST_LEN 64

struct file_state {
struct stat stat;
char digest[SHA256_DIGEST_LEN];
};

#define FILE_STATE_PRE 0
#define FILE_STATE_POST 1

/**
* Use sha256sum utility to get the sha256 digest of the file.
*
* \note sha256sum was used to avoid introducing libcrypto dependency.
*/
static void
sha256sum(const char *file, char digest[SHA256_DIGEST_LEN])
{
char cmd[1024];
snprintf(cmd, sizeof(cmd), "sha256sum \"%s\"", file);

FILE *fp = popen(cmd, "r");
assert_non_null(fp);

/** sha256sum prints: <64 hex chars> <filename> */
assert_int_equal(fscanf(fp, "%" STR(SHA256_DIGEST_LEN) "s", digest), 1);

pclose(fp);
}

/**
* Simple sequence of operations:
* - stat + sha256sum
* - open
* - update a single value
* - close
* - stat + sha256sum
*
* \param[in] tctx Test context to get the pool name and access to the pool handle.
* \param[out] fs [0] state of the pool file at the beginning and [1] at the end.
* \param[in] write_mode Whether to open the pool in the write mode.
*/
static void
helper_stat_open_modify_close_stat(struct dt_vos_pool_ctx *tctx, struct file_state fs[2],
bool write_mode)
{
const char *path = tctx->dvt_pmem_file;

assert_int_equal(stat(path, &fs[FILE_STATE_PRE].stat), 0);
sha256sum(path, fs[FILE_STATE_PRE].digest);

assert_success(dv_pool_open(path, NULL, &tctx->dvt_poh, 0, write_mode));
update_value_to_modify_tests((void **)&tctx);
assert_success(dv_pool_close(tctx->dvt_poh));

assert_int_equal(stat(path, &fs[FILE_STATE_POST].stat), 0);
sha256sum(path, fs[FILE_STATE_POST].digest);
}

static void
read_only_vs_write_mode_test(void **state)
{
struct dt_vos_pool_ctx *tctx = *state;
struct file_state fs[2];

/** In read‑only mode, the pool contents remain unchanged, and its mtime stays the same. */
helper_stat_open_modify_close_stat(tctx, fs, false /** read-only */);
assert_int_equal(fs[FILE_STATE_PRE].stat.st_mtime, fs[FILE_STATE_POST].stat.st_mtime);
assert_memory_equal(fs[FILE_STATE_PRE].digest, fs[FILE_STATE_PRE].digest,
SHA256_DIGEST_LEN);

/** In write mode, the pool contents will change and its mtime will increase. */
helper_stat_open_modify_close_stat(tctx, fs, true /** read-write */);
assert_true(fs[FILE_STATE_PRE].stat.st_mtime < fs[FILE_STATE_POST].stat.st_mtime);
assert_memory_not_equal(fs[FILE_STATE_PRE].digest, fs[FILE_STATE_POST].digest,
SHA256_DIGEST_LEN);
}

/*
* All these tests use the same VOS tree that is created at suit_setup. Therefore, tests
* that modify the state of the tree (delete, add, etc) should be run after all others.
Expand Down Expand Up @@ -1149,6 +1231,8 @@ const struct CMUnitTest dv_test_cases[] = {
TEST(dtx_abort_active_table),
TEST(path_verify),
{"pool_flag_update", pool_flags_tests, NULL, NULL},
{"read_only_vs_write_mode", read_only_vs_write_mode_test, NULL,
NULL}, /* don't want this test to run with setup */
};

int
Expand Down
4 changes: 1 addition & 3 deletions src/vos/vos_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,7 @@ struct vos_pool {
/** memory attribute of the @vp_umm */
struct umem_attr vp_uma;
/** memory class instance of the pool */
struct umem_instance vp_umm;
/** Size of pool file */
uint64_t vp_size;
struct umem_instance vp_umm;
/** Features enabled for this pool */
uint64_t vp_feats;
/** btr handle for the container table */
Expand Down
Loading
Loading