From c010babf99dbe5f550b4ea1fda4fb8faa3102276 Mon Sep 17 00:00:00 2001 From: Xuezhao Liu Date: Mon, 9 Feb 2026 23:25:33 +0000 Subject: [PATCH] DAOS-18487 placement: fix a bug in shard remapping chain For the case of 2nd remap, if the spare target is DOWN2UP need to set fs_down2up flag, to make it be able to set shard's po_rebuilding flag at the end. One example case - Target A is DOWN, rebuild completed and status changed to DOWNOUT Target B is DOWN, rebuild started but not completed but admin do the reint, its status change to UP and with DOWN2UP flag. In object layout calculation, one shard firstly located in Target A, but 1st remap to Target B, but still need to do 2nd remap. In this case should set fs_down2up flag which is not set in the 1st remap, to avoid not be able to set shard's po_rebuilding flag so will cause read from it (invalid place). This bug could cause data corruption (mostly like with cause shard losing). Signed-off-by: Xuezhao Liu --- src/object/obj_layout.c | 4 ++-- src/placement/pl_map_common.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/object/obj_layout.c b/src/object/obj_layout.c index 87958b70a11..50136c40986 100644 --- a/src/object/obj_layout.c +++ b/src/object/obj_layout.c @@ -115,8 +115,8 @@ obj_dump_grp_layout(daos_handle_t oh, uint32_t shard) } grp_idx = shard / obj->cob_grp_size; - D_INFO(DF_OID " shard %d, grp_idx %d, grp_size %d", DP_OID(obj->cob_md.omd_id), shard, - grp_idx, obj->cob_grp_size); + D_INFO(DF_OID " shard %d, grp_idx %d, grp_size %d, map_ver %d", DP_OID(obj->cob_md.omd_id), + shard, grp_idx, obj->cob_grp_size, obj->cob_version); for (i = grp_idx * obj->cob_grp_size, nr = 0; nr < obj->cob_grp_size; i++, nr++) { obj_shard = &obj->cob_shards->do_shards[i]; D_INFO("shard %d/%d/%d, tgt_id %d, rank %d, tgt_idx %d, " diff --git a/src/placement/pl_map_common.c b/src/placement/pl_map_common.c index 5afe0691a37..1ef403eecad 100644 --- a/src/placement/pl_map_common.c +++ b/src/placement/pl_map_common.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -378,6 +379,22 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md, f_shard->fs_fseq = spare_tgt->ta_comp.co_fseq; f_shard->fs_status = spare_tgt->ta_comp.co_status; + /* For the case of 2nd remap, if the spare target is DOWN2UP need to set + * fs_down2up flag, to make it be able to set shard's po_rebuilding flag + * at the end. + * One example case - + * Target A is DOWN, rebuild completed and status changed to DOWNOUT + * Target B is DOWN, rebuild started but not completed but admin do the reint, + * its status change to UP and with DOWN2UP flag. + * + * In object layout calculation, one shard firstly located in Target A, but 1st + * remap to Target B, but still need to do 2nd remap. In this case should set + * fs_down2up flag which is not set in the 1st remap, to avoid not be able to set + * shard's po_rebuilding flag so will cause read from it (invalid place). + */ + if (pool_target_is_down2up(spare_tgt)) + f_shard->fs_down2up = 1; + d_list_del_init(&f_shard->fs_list); remap_add_one(remap_list, f_shard);