diff --git a/compute/src/main/java/org/zstack/compute/vm/VmNicManagerImpl.java b/compute/src/main/java/org/zstack/compute/vm/VmNicManagerImpl.java index 25c0b005d71..31b3e35d32a 100644 --- a/compute/src/main/java/org/zstack/compute/vm/VmNicManagerImpl.java +++ b/compute/src/main/java/org/zstack/compute/vm/VmNicManagerImpl.java @@ -58,6 +58,10 @@ public void afterAddIpAddress(String vmNicUUid, String usedIpUuid) { SQL.New(UsedIpVO.class).eq(UsedIpVO_.uuid, usedIpUuid).set(UsedIpVO_.vmNicUuid, vmNicUUid).update(); VmNicVO nic = Q.New(VmNicVO.class).eq(VmNicVO_.uuid, vmNicUUid).find(); + if (nic == null) { + logger.debug(String.format("VmNic[uuid:%s] not found, skip afterAddIpAddress", vmNicUUid)); + return; + } UsedIpVO temp = null; /* if there is ipv4 addresses, we put the first attached ipv4 address to VmNic.ip @@ -88,6 +92,10 @@ public void afterAddIpAddress(String vmNicUUid, String usedIpUuid) { @Override public void afterDelIpAddress(String vmNicUUid, String usedIpUuid) { VmNicVO nic = Q.New(VmNicVO.class).eq(VmNicVO_.uuid, vmNicUUid).find(); + if (nic == null) { + logger.debug(String.format("VmNic[uuid:%s] not found, skip afterDelIpAddress", vmNicUUid)); + return; + } if (nic.getUsedIpUuid() != null && !nic.getUsedIpUuid().equals(usedIpUuid)) { return; } diff --git a/conf/i18n/globalErrorCodeMapping/global-error-en_US.json b/conf/i18n/globalErrorCodeMapping/global-error-en_US.json index 32eb4c8f056..715e823d95e 100644 --- a/conf/i18n/globalErrorCodeMapping/global-error-en_US.json +++ b/conf/i18n/globalErrorCodeMapping/global-error-en_US.json @@ -3374,7 +3374,7 @@ "ORG_ZSTACK_NETWORK_HUAWEI_IMASTER_10019": "delete token of SDN controller [IP:%s] failed because %s", "ORG_ZSTACK_STORAGE_PRIMARY_BLOCK_10004": "Cannot execute volume mapping to host flow due to invalid volume ID.%s", "ORG_ZSTACK_NETWORK_SERVICE_PORTFORWARDING_10007": "port forwarding rule [uuid:%s] has not been attached to any virtual machine network interface, cannot detach", - "ORG_ZSTACK_MEVOCO_10088": "cannot take a snapshot for volumes[%s] when volume[uuid: %s] is not attached", + "ORG_ZSTACK_MEVOCO_10088": "cannot create snapshot for volume[uuid:%s] because it is not attached to any VM instance. Please attach the volume to a VM first. Affected volumes: %s", "ORG_ZSTACK_STORAGE_PRIMARY_BLOCK_10005": "Cannot execute map LUN to host flow due to invalid LUN type: %s", "ORG_ZSTACK_NETWORK_SERVICE_PORTFORWARDING_10008": "port forwarding rule [uuid:%s] has been associated with vm nic [uuid:%s], cannot be reassigned again", "ORG_ZSTACK_MEVOCO_10087": "A Running VM[uuid:%s] has no associated Host UUID.", diff --git a/conf/i18n/globalErrorCodeMapping/global-error-zh_CN.json b/conf/i18n/globalErrorCodeMapping/global-error-zh_CN.json index 84609838ddc..01960e8eb45 100644 --- a/conf/i18n/globalErrorCodeMapping/global-error-zh_CN.json +++ b/conf/i18n/globalErrorCodeMapping/global-error-zh_CN.json @@ -3374,7 +3374,7 @@ "ORG_ZSTACK_NETWORK_HUAWEI_IMASTER_10019": "删除 SDN 控制器 [IP:%s] 的令牌失败,因为 %s", "ORG_ZSTACK_STORAGE_PRIMARY_BLOCK_10004": "无法执行映射LUN到主机流程,无效的LUN ID", "ORG_ZSTACK_NETWORK_SERVICE_PORTFORWARDING_10007": "端口转发规则 rule[uuid:%s] 没有绑定到任何 VM 的网卡上,无法解除绑定", - "ORG_ZSTACK_MEVOCO_10088": "无法为挂载状态以外的卷[%s]创建快照", + "ORG_ZSTACK_MEVOCO_10088": "无法为云盘[uuid:%s]创建快照,因为该云盘未挂载到任何云主机。请先将云盘挂载到云主机后再创建快照。相关云盘: %s", "ORG_ZSTACK_STORAGE_PRIMARY_BLOCK_10005": "无法执行映射LUN到主机流程,无效的LUN类型", "ORG_ZSTACK_NETWORK_SERVICE_PORTFORWARDING_10008": "端口转发规则[uuid:%s]已绑定到VM网卡[uuid:%s],无法再次绑定", "ORG_ZSTACK_MEVOCO_10087": "如何一个运行中的VM[uuid:%s]没有宿主机uuid?", diff --git a/core/src/main/java/org/zstack/core/cloudbus/ResourceDestinationMakerImpl.java b/core/src/main/java/org/zstack/core/cloudbus/ResourceDestinationMakerImpl.java index 08a776f1db2..9ead578395d 100755 --- a/core/src/main/java/org/zstack/core/cloudbus/ResourceDestinationMakerImpl.java +++ b/core/src/main/java/org/zstack/core/cloudbus/ResourceDestinationMakerImpl.java @@ -27,27 +27,27 @@ public class ResourceDestinationMakerImpl implements ManagementNodeChangeListene private DatabaseFacade dbf; @Override - public void nodeJoin(ManagementNodeInventory inv) { + public synchronized void nodeJoin(ManagementNodeInventory inv) { nodeHash.add(inv.getUuid()); nodes.put(inv.getUuid(), new NodeInfo(inv)); } @Override - public void nodeLeft(ManagementNodeInventory inv) { + public synchronized void nodeLeft(ManagementNodeInventory inv) { String nodeId = inv.getUuid(); nodeHash.remove(nodeId); nodes.remove(nodeId); } @Override - public void iAmDead(ManagementNodeInventory inv) { + public synchronized void iAmDead(ManagementNodeInventory inv) { String nodeId = inv.getUuid(); nodeHash.remove(nodeId); nodes.remove(nodeId); } @Override - public void iJoin(ManagementNodeInventory inv) { + public synchronized void iJoin(ManagementNodeInventory inv) { List lst = Q.New(ManagementNodeVO.class).list(); lst.forEach((ManagementNodeVO node) -> { nodeHash.add(node.getUuid()); @@ -56,7 +56,7 @@ public void iJoin(ManagementNodeInventory inv) { } @Override - public String makeDestination(String resourceUuid) { + public synchronized String makeDestination(String resourceUuid) { String nodeUuid = nodeHash.get(resourceUuid); if (nodeUuid == null) { throw new CloudRuntimeException("Cannot find any available management node to send message"); @@ -66,18 +66,18 @@ public String makeDestination(String resourceUuid) { } @Override - public boolean isManagedByUs(String resourceUuid) { + public synchronized boolean isManagedByUs(String resourceUuid) { String nodeUuid = makeDestination(resourceUuid); return nodeUuid.equals(Platform.getManagementServerId()); } @Override - public Collection getManagementNodesInHashRing() { - return nodeHash.getNodes(); + public synchronized Collection getManagementNodesInHashRing() { + return new ArrayList<>(nodeHash.getNodes()); } @Override - public NodeInfo getNodeInfo(String nodeUuid) { + public synchronized NodeInfo getNodeInfo(String nodeUuid) { NodeInfo info = nodes.get(nodeUuid); if (info == null) { ManagementNodeVO vo = dbf.findByUuid(nodeUuid, ManagementNodeVO.class); @@ -93,17 +93,17 @@ public NodeInfo getNodeInfo(String nodeUuid) { } @Override - public Collection getAllNodeInfo() { - return nodes.values(); + public synchronized Collection getAllNodeInfo() { + return new ArrayList<>(nodes.values()); } @Override - public int getManagementNodeCount() { - return nodes.values().size(); + public synchronized int getManagementNodeCount() { + return nodes.size(); } - public boolean isNodeInCircle(String nodeId) { + public synchronized boolean isNodeInCircle(String nodeId) { return nodeHash.hasNode(nodeId); } } diff --git a/core/src/main/java/org/zstack/core/thread/DispatchQueueImpl.java b/core/src/main/java/org/zstack/core/thread/DispatchQueueImpl.java index e298cdd787f..961b192ac7c 100755 --- a/core/src/main/java/org/zstack/core/thread/DispatchQueueImpl.java +++ b/core/src/main/java/org/zstack/core/thread/DispatchQueueImpl.java @@ -302,7 +302,7 @@ private class SyncTaskFuture extends AbstractFuture { public SyncTaskFuture(SyncTask task) { super(task); - this.parentContext = Context.current(); + this.parentContext = isTelemetryEnabled() ? Context.current() : null; } private SyncTask getTask() { diff --git a/header/src/main/java/org/zstack/header/storage/addon/primary/ExternalPrimaryStorageInventory.java b/header/src/main/java/org/zstack/header/storage/addon/primary/ExternalPrimaryStorageInventory.java index 7808c227623..a15ed211307 100644 --- a/header/src/main/java/org/zstack/header/storage/addon/primary/ExternalPrimaryStorageInventory.java +++ b/header/src/main/java/org/zstack/header/storage/addon/primary/ExternalPrimaryStorageInventory.java @@ -4,8 +4,10 @@ import org.zstack.header.storage.primary.PrimaryStorageInventory; import org.zstack.utils.gson.JSONObjectUtil; +import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; @Inventory(mappingVOClass = ExternalPrimaryStorageVO.class) @@ -59,6 +61,7 @@ public ExternalPrimaryStorageInventory(ExternalPrimaryStorageVO lvo) { super(lvo); identity = lvo.getIdentity(); config = JSONObjectUtil.toObject(lvo.getConfig(), LinkedHashMap.class); + desensitizeConfig(config); addonInfo = JSONObjectUtil.toObject(lvo.getAddonInfo(), LinkedHashMap.class); outputProtocols = lvo.getOutputProtocols().stream().map(PrimaryStorageOutputProtocolRefVO::getOutputProtocol).collect(Collectors.toList()); defaultProtocol = lvo.getDefaultProtocol(); @@ -68,6 +71,35 @@ public static ExternalPrimaryStorageInventory valueOf(ExternalPrimaryStorageVO l return new ExternalPrimaryStorageInventory(lvo); } + private static void desensitizeConfig(Map config) { + if (config == null) return; + desensitizeUrlList(config, "mdsUrls"); + desensitizeUrlList(config, "mdsInfos"); + } + + private static void desensitizeUrlList(Map config, String key) { + Object urls = config.get(key); + if (urls instanceof List) { + List desensitized = new ArrayList<>(); + for (Object url : (List) urls) { + desensitized.add(desensitizeUrl(String.valueOf(url))); + } + config.put(key, desensitized); + } + } + + private static String desensitizeUrl(String url) { + int atIndex = url.lastIndexOf('@'); + if (atIndex > 0) { + int schemeIndex = url.indexOf("://"); + if (schemeIndex >= 0 && schemeIndex < atIndex) { + return url.substring(0, schemeIndex + 3) + "***" + url.substring(atIndex); + } + return "***" + url.substring(atIndex); + } + return url; + } + public String getIdentity() { return identity; } diff --git a/header/src/main/java/org/zstack/header/vm/VmInstanceSpec.java b/header/src/main/java/org/zstack/header/vm/VmInstanceSpec.java index 7007c592aea..99ee2173b98 100755 --- a/header/src/main/java/org/zstack/header/vm/VmInstanceSpec.java +++ b/header/src/main/java/org/zstack/header/vm/VmInstanceSpec.java @@ -847,7 +847,9 @@ public void setBootMode(String bootMode) { public long getRootDiskAllocateSize() { if (rootDiskOffering == null) { - return this.getImageSpec().getInventory().getSize(); + long virtualSize = this.getImageSpec().getInventory().getSize(); + long actualSize = this.getImageSpec().getInventory().getActualSize(); + return Math.max(virtualSize, actualSize); } return rootDiskOffering.getDiskSize(); } diff --git a/header/src/main/java/org/zstack/header/vm/VmInstanceState.java b/header/src/main/java/org/zstack/header/vm/VmInstanceState.java index 8a755b52fda..49303e23252 100755 --- a/header/src/main/java/org/zstack/header/vm/VmInstanceState.java +++ b/header/src/main/java/org/zstack/header/vm/VmInstanceState.java @@ -168,6 +168,7 @@ public enum VmInstanceState { new Transaction(VmInstanceStateEvent.destroyed, VmInstanceState.Destroyed), new Transaction(VmInstanceStateEvent.destroying, VmInstanceState.Destroying), new Transaction(VmInstanceStateEvent.running, VmInstanceState.Running), + new Transaction(VmInstanceStateEvent.stopped, VmInstanceState.Stopped), new Transaction(VmInstanceStateEvent.expunging, VmInstanceState.Expunging) ); Destroyed.transactions( diff --git a/network/src/main/java/org/zstack/network/l3/L3BasicNetwork.java b/network/src/main/java/org/zstack/network/l3/L3BasicNetwork.java index 5536a5fc487..b1b0b92d497 100755 --- a/network/src/main/java/org/zstack/network/l3/L3BasicNetwork.java +++ b/network/src/main/java/org/zstack/network/l3/L3BasicNetwork.java @@ -1075,6 +1075,13 @@ private void handle(APIGetFreeIpMsg msg) { } limit -= freeIpInventorys.size(); } + + Set reservedIpRanges = self.getReservedIpRanges(); + if (reservedIpRanges != null && !reservedIpRanges.isEmpty()) { + freeIpInventorys.removeIf(freeIp -> reservedIpRanges.stream().anyMatch( + r -> NetworkUtils.isInRange(freeIp.getIp(), r.getStartIp(), r.getEndIp()))); + } + reply.setInventories(freeIpInventorys); bus.reply(msg, reply); diff --git a/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageBase.java b/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageBase.java index d80b40a1d6a..8b387306683 100755 --- a/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageBase.java +++ b/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageBase.java @@ -5446,7 +5446,7 @@ private void deleteSnapshotOnPrimaryStorage(final DeleteSnapshotOnPrimaryStorage httpCall(DELETE_SNAPSHOT_PATH, cmd, DeleteSnapshotRsp.class, new ReturnValueCompletion(msg) { @Override public void success(DeleteSnapshotRsp returnValue) { - osdHelper.releaseAvailableCapacity(msg.getSnapshot().getPrimaryStorageInstallPath(), msg.getSnapshot().getSize()); + osdHelper.releaseAvailableCapWithRatio(msg.getSnapshot().getPrimaryStorageInstallPath(), msg.getSnapshot().getSize()); bus.reply(msg, reply); completion.done(); } diff --git a/plugin/loadBalancer/src/main/java/org/zstack/network/service/lb/LoadBalancerApiInterceptor.java b/plugin/loadBalancer/src/main/java/org/zstack/network/service/lb/LoadBalancerApiInterceptor.java index 97b88c919c2..0d9946d5320 100755 --- a/plugin/loadBalancer/src/main/java/org/zstack/network/service/lb/LoadBalancerApiInterceptor.java +++ b/plugin/loadBalancer/src/main/java/org/zstack/network/service/lb/LoadBalancerApiInterceptor.java @@ -39,6 +39,7 @@ import org.zstack.network.service.vip.VipVO_; import org.zstack.tag.PatternedSystemTag; import org.zstack.tag.TagManager; +import org.zstack.core.upgrade.UpgradeGlobalConfig; import org.zstack.utils.*; import org.zstack.utils.function.ForEachFunction; import org.zstack.utils.logging.CLogger; @@ -152,10 +153,22 @@ public APIMessage intercept(APIMessage msg) throws ApiMessageInterceptionExcepti validate((APIGetCandidateVmNicsForLoadBalancerServerGroupMsg)msg); } else if (msg instanceof APIChangeLoadBalancerBackendServerMsg) { validate((APIChangeLoadBalancerBackendServerMsg)msg); + } else if (msg instanceof APIDeleteLoadBalancerMsg) { + validate((APIDeleteLoadBalancerMsg) msg); } return msg; } + private void validate(APIDeleteLoadBalancerMsg msg) { + if (UpgradeGlobalConfig.GRAYSCALE_UPGRADE.value(Boolean.class)) { + LoadBalancerVO lb = dbf.findByUuid(msg.getUuid(), LoadBalancerVO.class); + if (lb != null && lb.getType() == LoadBalancerType.SLB) { + throw new ApiMessageInterceptionException(argerr( + "cannot delete the standalone load balancer[uuid:%s] during grayscale upgrade", msg.getUuid())); + } + } + } + private void validate(APIDeleteAccessControlListMsg msg) { /*List refs = Q.New(LoadBalancerListenerACLRefVO.class).select(LoadBalancerListenerACLRefVO_.listenerUuid) .eq(LoadBalancerListenerACLRefVO_.aclUuid, msg.getUuid()).isNull(LoadBalancerListenerACLRefVO_.serverGroupUuid).listValues(); diff --git a/plugin/securityGroup/src/main/java/org/zstack/network/securitygroup/SecurityGroupApiInterceptor.java b/plugin/securityGroup/src/main/java/org/zstack/network/securitygroup/SecurityGroupApiInterceptor.java index 135ad3886aa..bd994e6d069 100755 --- a/plugin/securityGroup/src/main/java/org/zstack/network/securitygroup/SecurityGroupApiInterceptor.java +++ b/plugin/securityGroup/src/main/java/org/zstack/network/securitygroup/SecurityGroupApiInterceptor.java @@ -343,13 +343,8 @@ private void validate(APISetVmNicSecurityGroupMsg msg) { if (!aoMap.isEmpty()) { Integer[] priorities = aoMap.keySet().toArray(new Integer[aoMap.size()]); Arrays.sort(priorities); - if (priorities[0] != 1) { - throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10022, "could no set vm nic security group, because invalid priority, priority expects to start at 1, but [%d]", priorities[0])); - } - for (int i = 0; i < priorities.length - 1; i++) { - if (priorities[i] + 1 != priorities[i + 1]) { - throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10023, "could no set vm nic security group, because invalid priority, priority[%d] and priority[%d] expected to be consecutive", priorities[i], priorities[i + 1])); - } + if (priorities[0] < 1) { + throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10022, "could no set vm nic security group, because invalid priority, priority expects to be positive, but [%d]", priorities[0])); } } @@ -390,13 +385,8 @@ private void validate(APISetVmNicSecurityGroupMsg msg) { if (!adminIntegers.isEmpty()) { Integer[] priorities = adminIntegers.toArray(new Integer[adminIntegers.size()]); Arrays.sort(priorities); - if (priorities[0] != 1) { - throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10024, "could no set vm nic security group, because admin security group priority[%d] must be higher than users", priorities[0])); - } - for (int i = 0; i < priorities.length - 1; i++) { - if (priorities[i] + 1 != priorities[i + 1]) { - throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10025, "could no set vm nic security group, because admin security group priority[%d] must be higher than users", priorities[i + 1])); - } + if (priorities[0] < 1) { + throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10024, "could no set vm nic security group, because admin security group priority[%d] must be positive", priorities[0])); } } } @@ -498,8 +488,9 @@ private void validate(APIUpdateSecurityGroupRulePriorityMsg msg) { rvos.stream().filter(rvo -> rvo.getUuid().equals(ao.getRuleUuid())).findFirst().orElseThrow(() -> new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10041, "could not update security group rule priority, because rule[uuid:%s] not in security group[uuid:%s]", ao.getRuleUuid(), msg.getSecurityGroupUuid()))); - rvos.stream().filter(rvo -> rvo.getPriority() == ao.getPriority()).findFirst().orElseThrow(() -> - new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10042, "could not update security group rule priority, because priority[%d] not in security group[uuid:%s]", ao.getPriority(), msg.getSecurityGroupUuid()))); + if (ao.getPriority() < 1) { + throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10042, "could not update security group rule priority, because priority[%d] must be positive", ao.getPriority())); + } } List uuidList = new ArrayList<>(priorityMap.values()); @@ -534,8 +525,8 @@ private void validate(APIChangeSecurityGroupRuleMsg msg) { if (count.intValue() > SecurityGroupGlobalConfig.SECURITY_GROUP_RULES_NUM_LIMIT.value(Integer.class)) { throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10047, "could not change security group rule, because security group %s rules number[%d] is out of max limit[%d]", vo.getType(), count.intValue(), SecurityGroupGlobalConfig.SECURITY_GROUP_RULES_NUM_LIMIT.value(Integer.class))); } - if (msg.getPriority() > count.intValue()) { - throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10048, "could not change security group rule, because the maximum priority of %s rule is [%d]", vo.getType().toString(), count.intValue())); + if (msg.getPriority() > SecurityGroupGlobalConfig.SECURITY_GROUP_RULES_NUM_LIMIT.value(Integer.class)) { + throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10048, "could not change security group rule, because priority[%d] exceeds the maximum limit[%d]", msg.getPriority(), SecurityGroupGlobalConfig.SECURITY_GROUP_RULES_NUM_LIMIT.value(Integer.class))); } if (msg.getPriority() < 0) { msg.setPriority(SecurityGroupConstant.LOWEST_RULE_PRIORITY); @@ -1198,11 +1189,8 @@ private void validate(APIAddSecurityGroupRuleMsg msg) { throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10119, "could not add security group rule, because security group %s rules number[%d] is out of max limit[%d]", SecurityGroupRuleType.Egress, (egressRuleCount + toCreateEgressRuleCount), SecurityGroupGlobalConfig.SECURITY_GROUP_RULES_NUM_LIMIT.value(Integer.class))); } - if (msg.getPriority() > (ingressRuleCount + 1) && toCreateIngressRuleCount > 0) { - throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10120, "could not add security group rule, because priority[%d] must be consecutive, the ingress rule maximum priority is [%d]", msg.getPriority(), ingressRuleCount)); - } - if (msg.getPriority() > (egressRuleCount + 1) && toCreateEgressRuleCount > 0) { - throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10121, "could not add security group rule, because priority[%d] must be consecutive, the egress rule maximum priority is [%d]", msg.getPriority(), egressRuleCount)); + if (msg.getPriority() > SecurityGroupGlobalConfig.SECURITY_GROUP_RULES_NUM_LIMIT.value(Integer.class)) { + throw new ApiMessageInterceptionException(argerr(ORG_ZSTACK_NETWORK_SECURITYGROUP_10120, "could not add security group rule, because priority[%d] exceeds the maximum limit[%d]", msg.getPriority(), SecurityGroupGlobalConfig.SECURITY_GROUP_RULES_NUM_LIMIT.value(Integer.class))); } } diff --git a/plugin/zbs/src/main/java/org/zstack/storage/zbs/ZbsStorageController.java b/plugin/zbs/src/main/java/org/zstack/storage/zbs/ZbsStorageController.java index db06239acb3..276ab367ba1 100644 --- a/plugin/zbs/src/main/java/org/zstack/storage/zbs/ZbsStorageController.java +++ b/plugin/zbs/src/main/java/org/zstack/storage/zbs/ZbsStorageController.java @@ -179,7 +179,10 @@ public List getActiveClients(String installPath, String prot if (VolumeProtocol.CBD.toString().equals(protocol)) { GetVolumeClientsCmd cmd = new GetVolumeClientsCmd(); cmd.setPath(installPath); - GetVolumeClientsRsp rsp = syncHttpCall(GET_VOLUME_CLIENTS_PATH, cmd, GetVolumeClientsRsp.class); + GetVolumeClientsRsp rsp = new HttpCaller<>(GET_VOLUME_CLIENTS_PATH, cmd, GetVolumeClientsRsp.class, + null, TimeUnit.SECONDS, 30, true) + .setTryNext(true) + .syncCall(); List clients = new ArrayList<>(); if (!rsp.isSuccess()) { @@ -1411,6 +1414,11 @@ public class HttpCaller { private boolean tryNext = false; + HttpCaller setTryNext(boolean tryNext) { + this.tryNext = tryNext; + return this; + } + public HttpCaller(String path, AgentCommand cmd, Class retClass, ReturnValueCompletion callback) { this(path, cmd, retClass, callback, null, 0, false); } diff --git a/portal/src/main/java/org/zstack/portal/managementnode/ManagementNodeManagerImpl.java b/portal/src/main/java/org/zstack/portal/managementnode/ManagementNodeManagerImpl.java index a945ab77274..4ece718ff52 100755 --- a/portal/src/main/java/org/zstack/portal/managementnode/ManagementNodeManagerImpl.java +++ b/portal/src/main/java/org/zstack/portal/managementnode/ManagementNodeManagerImpl.java @@ -74,6 +74,7 @@ import java.sql.SQLException; import java.sql.Timestamp; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -107,6 +108,15 @@ public class ManagementNodeManagerImpl extends AbstractService implements Manage // A dictionary (nodeId -> ManagementNodeInventory) of joined management Node final private Map joinedManagementNodes = new ConcurrentHashMap<>(); + // Lock to serialize lifecycle events from heartbeat reconciliation and canonical event callbacks, + // preventing race conditions where a nodeJoin event is immediately followed by a stale nodeLeft + // from the heartbeat thread, or vice versa. See ZSTAC-77711. + private final Object lifecycleLock = new Object(); + + // Track nodes found in hash ring but missing from DB. Only call nodeLeft after a node + // is missing for two consecutive heartbeat cycles, to avoid removing nodes that just joined. + private final Set suspectedMissingFromDb = new HashSet<>(); + private static int NODE_STARTING = 0; private static int NODE_RUNNING = 1; private static int NODE_FAILED = -1; @@ -368,12 +378,16 @@ protected void run(Map tokens, Object data) { ManagementNodeLifeCycleData d = (ManagementNodeLifeCycleData) data; - if (LifeCycle.NodeJoin.toString().equals(d.getLifeCycle())) { - nodeLifeCycle.nodeJoin(d.getInventory()); - } else if (LifeCycle.NodeLeft.toString().equals(d.getLifeCycle())) { - nodeLifeCycle.nodeLeft(d.getInventory()); - } else { - throw new CloudRuntimeException(String.format("unknown lifecycle[%s]", d.getLifeCycle())); + synchronized (lifecycleLock) { + if (LifeCycle.NodeJoin.toString().equals(d.getLifeCycle())) { + // Clear from suspected set since the node is confirmed alive + suspectedMissingFromDb.remove(d.getInventory().getUuid()); + nodeLifeCycle.nodeJoin(d.getInventory()); + } else if (LifeCycle.NodeLeft.toString().equals(d.getLifeCycle())) { + nodeLifeCycle.nodeLeft(d.getInventory()); + } else { + throw new CloudRuntimeException(String.format("unknown lifecycle[%s]", d.getLifeCycle())); + } } } }; @@ -860,34 +874,55 @@ private void checkAllNodesHealth() { Set nodeUuidsInDb = nodesInDb.stream().map(ManagementNodeVO::getUuid).collect(Collectors.toSet()); - // When a node is dying, we may not receive the the dead notification because the message bus may be also dead - // at that moment. By checking if the node UUID is still in our hash ring, we know what nodes should be kicked out - destinationMaker.getManagementNodesInHashRing().forEach(nodeUuid -> { - if (!nodeUuidsInDb.contains(nodeUuid)) { - logger.warn(String.format("found that a management node[uuid:%s] had no heartbeat in database but still in our hash ring," + - "notify that it's dead", nodeUuid)); - ManagementNodeInventory inv = new ManagementNodeInventory(); - inv.setUuid(nodeUuid); - inv.setHostName(destinationMaker.getNodeInfo(nodeUuid).getNodeIP()); - - nodeLifeCycle.nodeLeft(inv); - } - }); - - // check if any node missing in our hash ring - nodesInDb.forEach(n -> { - if (n.getUuid().equals(node().getUuid()) || suspects.contains(n)) { - return; - } - - new Runnable() { - @Override - @AsyncThread - public void run() { - nodeLifeCycle.nodeJoin(ManagementNodeInventory.valueOf(n)); + // Reconcile hash ring with DB under lifecycleLock to prevent race with + // canonical event callbacks (nodeJoin/nodeLeft). See ZSTAC-77711. + synchronized (lifecycleLock) { + // When a node is dying, we may not receive the dead notification because the message bus may be also dead + // at that moment. By checking if the node UUID is still in our hash ring, we know what nodes should be kicked out. + // Use two-round confirmation: first round marks as suspected, second round actually removes. + Set currentSuspected = new HashSet<>(); + destinationMaker.getManagementNodesInHashRing().forEach(nodeUuid -> { + if (!nodeUuidsInDb.contains(nodeUuid)) { + if (suspectedMissingFromDb.contains(nodeUuid)) { + // Second consecutive detection — confirmed missing, remove from hash ring + logger.warn(String.format("management node[uuid:%s] confirmed missing from database for two consecutive" + + " heartbeat cycles, removing from hash ring", nodeUuid)); + ManagementNodeInventory inv = new ManagementNodeInventory(); + inv.setUuid(nodeUuid); + try { + inv.setHostName(destinationMaker.getNodeInfo(nodeUuid).getNodeIP()); + } catch (Exception e) { + logger.warn(String.format("cannot get node info for node[uuid:%s], use empty hostname", nodeUuid)); + } + + nodeLifeCycle.nodeLeft(inv); + } else { + // First detection — mark as suspected, defer removal to next cycle + logger.warn(String.format("management node[uuid:%s] not found in database but still in hash ring," + + " marking as suspected (will remove on next heartbeat if still missing)", nodeUuid)); + currentSuspected.add(nodeUuid); + } } - }.run(); - }); + }); + // Update suspected set: only keep nodes that are newly suspected this round + suspectedMissingFromDb.clear(); + suspectedMissingFromDb.addAll(currentSuspected); + + // check if any node missing in our hash ring + nodesInDb.forEach(n -> { + if (n.getUuid().equals(node().getUuid()) || suspects.contains(n)) { + return; + } + + new Runnable() { + @Override + @AsyncThread + public void run() { + nodeLifeCycle.nodeJoin(ManagementNodeInventory.valueOf(n)); + } + }.run(); + }); + } } @Override diff --git a/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/AddSecurityGroupRuleOptimizedCase.groovy b/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/AddSecurityGroupRuleOptimizedCase.groovy index ed9a2736c5f..15f9c2c3d66 100644 --- a/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/AddSecurityGroupRuleOptimizedCase.groovy +++ b/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/AddSecurityGroupRuleOptimizedCase.groovy @@ -531,16 +531,17 @@ class AddSecurityGroupRuleOptimizedCase extends SubCase { } } - SecurityGroupRuleAO rule_82 = new SecurityGroupRuleAO() - rule_82.type = "Ingress" - rule_82.protocol = "TCP" - rule_82.dstPortRange = 82 + // ZSTAC-79067: non-consecutive priorities are now allowed, verify global limit instead + SecurityGroupRuleAO rule_over_limit = new SecurityGroupRuleAO() + rule_over_limit.type = "Ingress" + rule_over_limit.protocol = "TCP" + rule_over_limit.dstPortRange = 82 expect(AssertionError) { addSecurityGroupRule { securityGroupUuid = sg3.uuid - rules = [rule_82] - priority = 82 + rules = [rule_over_limit] + priority = 101 } } } @@ -591,13 +592,13 @@ class AddSecurityGroupRuleOptimizedCase extends SubCase { rule_13.protocol = "ALL" rule_13.startPort = -1 rule_13.endPort = -1 - expect(AssertionError) { - addSecurityGroupRule { - securityGroupUuid = sg3.uuid - rules = [rule_13] - priority = 13 - } + // ZSTAC-79067: non-consecutive priorities are now allowed + sg3 = addSecurityGroupRule { + securityGroupUuid = sg3.uuid + rules = [rule_13] + priority = 13 } + assert sg3.rules.find { it.priority == 13 } != null SecurityGroupRuleAO rule_12 = new SecurityGroupRuleAO() rule_12.dstIpRange = "2.2.2.2-2.2.2.10" @@ -609,12 +610,11 @@ class AddSecurityGroupRuleOptimizedCase extends SubCase { ingressRule.protocol = "TCP" ingressRule.dstPortRange = "12-13" - expect(AssertionError) { - addSecurityGroupRule { - securityGroupUuid = sg3.uuid - rules = [rule_12, ingressRule] - priority = 12 - } + // ZSTAC-79067: mixed ingress+egress add at non-consecutive priority is now allowed + sg3 = addSecurityGroupRule { + securityGroupUuid = sg3.uuid + rules = [rule_12, ingressRule] + priority = 12 } } diff --git a/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/ChangeSecurityGroupRuleCase.groovy b/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/ChangeSecurityGroupRuleCase.groovy index ad29787ef1e..60b59424257 100644 --- a/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/ChangeSecurityGroupRuleCase.groovy +++ b/test/src/test/groovy/org/zstack/test/integration/networkservice/provider/flat/securitygroup/ChangeSecurityGroupRuleCase.groovy @@ -231,19 +231,8 @@ class ChangeSecurityGroupRuleCase extends SubCase { assert sg3 != null SecurityGroupRuleInventory rule_1 = sg3.rules.find { it.type == "Ingress" && it.priority == 1 && it.ipVersion == 4 } - expect(AssertionError) { - changeSecurityGroupRule { - uuid = rule_1.uuid - priority = 6 - } - } - - expect(AssertionError) { - changeSecurityGroupRule { - uuid = rule_1.uuid - priority = 7 - } - } + // ZSTAC-79067: priorities beyond current rule count are now allowed + // (removed consecutive priority restriction) } void testChangeRuleDuplicate() { @@ -307,12 +296,8 @@ class ChangeSecurityGroupRuleCase extends SubCase { } } - expect(AssertionError) { - changeSecurityGroupRule { - uuid = rule1.uuid - priority = 3 - } - } + // ZSTAC-79067: priority beyond rule count is now allowed + // (removed consecutive restriction, only check against global limit) expect(AssertionError) { changeSecurityGroupRule { diff --git a/utils/src/main/java/org/zstack/utils/clouderrorcode/CloudOperationsErrorCode.java b/utils/src/main/java/org/zstack/utils/clouderrorcode/CloudOperationsErrorCode.java index a0f09d4f1e9..a550fb7d673 100644 --- a/utils/src/main/java/org/zstack/utils/clouderrorcode/CloudOperationsErrorCode.java +++ b/utils/src/main/java/org/zstack/utils/clouderrorcode/CloudOperationsErrorCode.java @@ -6274,6 +6274,8 @@ public class CloudOperationsErrorCode { public static final String ORG_ZSTACK_STORAGE_BACKUP_10133 = "ORG_ZSTACK_STORAGE_BACKUP_10133"; + public static final String ORG_ZSTACK_STORAGE_BACKUP_CANCEL_TIMEOUT = "ORG_ZSTACK_STORAGE_BACKUP_CANCEL_TIMEOUT"; + public static final String ORG_ZSTACK_COMPUTE_10000 = "ORG_ZSTACK_COMPUTE_10000"; public static final String ORG_ZSTACK_COMPUTE_10001 = "ORG_ZSTACK_COMPUTE_10001";