Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.zstack.header.zone.ZoneStateEvent;

abstract class AbstractZone implements Zone {
private static DatabaseFacade dbf = Platform.getComponentLoader().getComponent(DatabaseFacade.class);
private final static StateMachine<ZoneState, ZoneStateEvent> stateMachine;

static {
Expand Down
189 changes: 189 additions & 0 deletions core/src/main/java/org/zstack/core/thread/AbstractCoalesceQueue.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package org.zstack.core.thread;

import org.springframework.beans.factory.annotation.Autowire;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Configurable;
import org.zstack.header.core.AbstractCompletion;
import org.zstack.header.core.Completion;
import org.zstack.header.core.ReturnValueCompletion;
import org.zstack.header.errorcode.ErrorCode;
import org.zstack.utils.Utils;
import org.zstack.utils.logging.CLogger;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;

/**
* Base implementation for coalesce queues.
*
* @param <T> Request Item Type
* @param <R> Batch Execution Result Type
* @param <V> Single Request Result Type
*/
@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
public abstract class AbstractCoalesceQueue<T, R, V> {
private static final CLogger logger = Utils.getLogger(AbstractCoalesceQueue.class);

@Autowired
private ThreadFacade thdf;

private final ConcurrentHashMap<String, SignatureQueue> signatureQueues = new ConcurrentHashMap<>();

protected class PendingRequest {
final T item;
final AbstractCompletion completion;

PendingRequest(T item, AbstractCompletion completion) {
this.item = item;
this.completion = completion;
}

@SuppressWarnings("unchecked")
void notifySuccess(V result) {
if (completion == null) {
return;
}

if (completion instanceof ReturnValueCompletion) {
((ReturnValueCompletion<V>) completion).success(result);
} else if (completion instanceof Completion) {
((Completion) completion).success();
}
}

void notifyFailure(ErrorCode errorCode) {
if (completion == null) {
return;
}

if (completion instanceof ReturnValueCompletion) {
((ReturnValueCompletion<V>) completion).fail(errorCode);
} else if (completion instanceof Completion) {
((Completion) completion).fail(errorCode);
}
}
}

private class SignatureQueue {
final String syncSignature;
List<PendingRequest> pendingList = Collections.synchronizedList(new ArrayList<>());

SignatureQueue(String syncSignature) {
this.syncSignature = syncSignature;
}

synchronized List<PendingRequest> takeAll() {
List<PendingRequest> toProcess = pendingList;
pendingList = Collections.synchronizedList(new ArrayList<>());
return toProcess;
}

synchronized void add(PendingRequest request) {
pendingList.add(request);
}

synchronized boolean isEmpty() {
return pendingList.isEmpty();
}
}

protected abstract String getName();

// Changed to take AbstractCompletion, subclasses cast it to specific type
protected abstract void executeBatch(List<T> items, AbstractCompletion completion);

protected abstract AbstractCompletion createBatchCompletion(String syncSignature, List<PendingRequest> requests, SyncTaskChain chain);

protected abstract V calculateResult(T item, R batchResult);

protected final void handleSuccess(String syncSignature, List<PendingRequest> requests, R batchResult, SyncTaskChain chain) {
for (PendingRequest req : requests) {
try {
V singleResult = calculateResult(req.item, batchResult);
req.notifySuccess(singleResult);
} catch (Throwable t) {
logger.warn(String.format("[%s] failed to calculate result for item %s", getName(), req.item), t);
req.notifyFailure(org.zstack.core.Platform.operr("failed to calculate result: %s", t.getMessage()));
}
}
cleanup(syncSignature);
chain.next();
}

protected final void handleFailure(String syncSignature, List<PendingRequest> requests, ErrorCode errorCode, SyncTaskChain chain) {
for (PendingRequest req : requests) {
req.notifyFailure(errorCode);
}
cleanup(syncSignature);
chain.next();
}

void setThreadFacade(ThreadFacade thdf) {
this.thdf = thdf;
}

protected final void submitRequest(String syncSignature, T item, AbstractCompletion completion) {
doSubmit(syncSignature, new PendingRequest(item, completion));
}

private void doSubmit(String syncSignature, PendingRequest request) {
SignatureQueue queue = signatureQueues.computeIfAbsent(syncSignature, SignatureQueue::new);
queue.add(request);

thdf.chainSubmit(new ChainTask(null) {
@Override
public String getSyncSignature() {
return String.format("coalesce-queue-%s-%s", AbstractCoalesceQueue.this.getName(), syncSignature);
}

@Override
public void run(SyncTaskChain chain) {
List<PendingRequest> requests = queue.takeAll();

if (requests.isEmpty()) {
chain.next();
return;
}

String name = getName();
logger.debug(String.format("[%s] coalescing %d requests for signature[%s]",
name, requests.size(), syncSignature));


// Create the specific completion type (Completion or ReturnValueCompletion)
AbstractCompletion batchCompletion = createBatchCompletion(syncSignature, requests, chain);

// Execute batch with the direct completion object
List<T> items = requests.stream().map(req -> req.item).collect(Collectors.toList());
executeBatch(items, batchCompletion);
}
Comment on lines +132 to +162
Copy link

@coderabbitai coderabbitai bot Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, find the AbstractCoalesceQueue file and understand its structure
find . -name "AbstractCoalesceQueue.java" -type f

Repository: MatheMatrix/zstack

Length of output: 132


🏁 Script executed:

# Look for the full AbstractCoalesceQueue class to understand executeBatch
cat core/src/main/java/org/zstack/core/thread/AbstractCoalesceQueue.java

Repository: MatheMatrix/zstack

Length of output: 6688


🏁 Script executed:

# Search for executeBatch method implementations and usages
rg "executeBatch" --type java -B 3 -A 8

Repository: MatheMatrix/zstack

Length of output: 15493


🏁 Script executed:

# Search for how exceptions are handled in ChainTask and chain completion patterns
rg "ChainTask" --type java -B 2 -A 10 | head -100

Repository: MatheMatrix/zstack

Length of output: 10940


🏁 Script executed:

# Look for batchCompletion and Completion related patterns
rg "class.*Completion.*extends" --type java | head -20

Repository: MatheMatrix/zstack

Length of output: 1670


🏁 Script executed:

# Find ThreadFacade and understand chainSubmit behavior
find . -name "ThreadFacade.java" -type f

Repository: MatheMatrix/zstack

Length of output: 123


🏁 Script executed:

# Check ThreadFacade implementation for exception handling
rg "class ThreadFacade" --type java -A 50 | head -80

Repository: MatheMatrix/zstack

Length of output: 5545


🏁 Script executed:

# Look for chainSubmit implementation
rg "chainSubmit" --type java -B 2 -A 15 | head -100

Repository: MatheMatrix/zstack

Length of output: 10714


🏁 Script executed:

# Check FlatDhcpBackend implementation more thoroughly
rg "class FlatDhcpBackend" --type java -A 100 | grep -A 30 "executeBatch"

Repository: MatheMatrix/zstack

Length of output: 4498


🏁 Script executed:

# Search for other production implementations of CoalesceQueue
fd "CoalesceQueue" --type f | grep -v test | grep -v ".git"

Repository: MatheMatrix/zstack

Length of output: 263


🏁 Script executed:

# Find ChainTask implementation to understand exception handling
find . -name "ChainTask.java" -type f

Repository: MatheMatrix/zstack

Length of output: 120


🏁 Script executed:

# Look at ChainTask implementation and how it handles exceptions
cat core/src/main/java/org/zstack/core/thread/ChainTask.java

Repository: MatheMatrix/zstack

Length of output: 1309


🏁 Script executed:

# Check if there's any documentation or contract about sync exceptions in executeBatch
rg "executeBatch" --type java -B 5 -A 3 | grep -A 5 -B 5 "throws\|throws\|synchronous\|async"

Repository: MatheMatrix/zstack

Length of output: 44


🏁 Script executed:

# Check test file to see what exception scenarios are tested
head -300 test/src/test/java/org/zstack/test/core/thread/TestCoalesceQueue.java

Repository: MatheMatrix/zstack

Length of output: 11639


🏁 Script executed:

# Check ThreadFacadeImpl for how it executes ChainTask and handles exceptions
cat core/src/main/java/org/zstack/core/thread/ThreadFacadeImpl.java | head -500

Repository: MatheMatrix/zstack

Length of output: 13298


🏁 Script executed:

# Search for exception handling in ChainTask execution context
rg "run\(SyncTaskChain" --type java -A 20 | head -150

Repository: MatheMatrix/zstack

Length of output: 14184


🏁 Script executed:

# Check if there's a wrapper or exception handler in the queue processing logic
rg "class.*TaskChain\|interface.*TaskChain" --type java

Repository: MatheMatrix/zstack

Length of output: 44


🏁 Script executed:

# Find DispatchQueue implementation and see if it wraps exception handling
find . -name "DispatchQueue*.java" -type f | grep -v test

Repository: MatheMatrix/zstack

Length of output: 191


🏁 Script executed:

# Check the DispatchQueue chainSubmit implementation
rg "chainSubmit" --type java -B 5 -A 30 | grep -A 30 "public.*chainSubmit\|private.*chainSubmit"

Repository: MatheMatrix/zstack

Length of output: 6101


🏁 Script executed:

# Look for SyncTaskChain interface and implementation
find . -name "SyncTaskChain.java" -type f

Repository: MatheMatrix/zstack

Length of output: 124


🏁 Script executed:

# Get the full DispatchQueueImpl to see the doChainSyncSubmit implementation
cat core/src/main/java/org/zstack/core/thread/DispatchQueueImpl.java

Repository: MatheMatrix/zstack

Length of output: 21663


🏁 Script executed:

# Also check SyncTaskChain interface
cat core/src/main/java/org/zstack/core/thread/SyncTaskChain.java

Repository: MatheMatrix/zstack

Length of output: 210


🏁 Script executed:

# Check how batchCompletion is used and ensure it's always called
rg "batchCompletion" --type java -B 3 -A 3 core/src/main/java/org/zstack/core/thread/

Repository: MatheMatrix/zstack

Length of output: 2913


🏁 Script executed:

# Verify the test case for exception handling in executeBatch
grep -A 20 "testResultCalculationFailure" test/src/test/java/org/zstack/test/core/thread/TestCoalesceQueue.java

Repository: MatheMatrix/zstack

Length of output: 1019


executeBatch 同步异常会导致批次请求永久挂起

如果 executeBatch 同步抛异常,当前批次的 completion 回调不会被触发,pending 请求永远不会收到成功或失败通知,导致请求长期挂起且队列不会清理。虽然 chain.next() 最终会被框架的 ChainFuture 异常处理机制调用,但 batchCompletion 的成功/失败路径被跳过,cleanup() 也无法执行。建议使用 try-catch 包装 executeBatch,在异常时调用 batchCompletion.fail() 确保统一的失败处理路径:

建议修复
                // Execute batch with the direct completion object
                List<T> items = requests.stream().map(req -> req.item).collect(Collectors.toList());
-               executeBatch(items, batchCompletion);
+               try {
+                   executeBatch(items, batchCompletion);
+               } catch (Throwable t) {
+                   logger.warn(String.format("[%s] executeBatch failed for signature[%s]", name, syncSignature), t);
+                   batchCompletion.fail(org.zstack.core.Platform.operr("executeBatch failed: %s", t.getMessage()));
+               }
🤖 Prompt for AI Agents
In `@core/src/main/java/org/zstack/core/thread/AbstractCoalesceQueue.java` around
lines 132 - 162, In doSubmit, executing executeBatch(...) can throw
synchronously and skip the batch completion/cleanup; wrap the call to
executeBatch(items, batchCompletion) in a try-catch around the executeBatch
invocation (inside the ChainTask.run), and on any Throwable call the created
batch completion's failure path (e.g. batchCompletion.fail(err)) then ensure
chain.next() is still invoked (or rethrow to let chain handle continuation) so
pending requests from queue.takeAll() are always resolved; refer to
methods/variables: doSubmit, ChainTask.run, queue.takeAll,
createBatchCompletion, executeBatch, and batchCompletion.fail() when making the
change.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment from jin.ma:

为啥会这样呢,我跑了 UT 你说的确实是对的,不应该有 completion aspectj 来确保吗

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Search for AspectJ completion aspects in the codebase
rg "aspect|Aspect" --type java -i | grep -i completion | head -20

# Search for AspectJ configuration and completion interceptors
fd "aspect" --type f | grep -E "\.(java|xml)$" | head -10

# Look for completion aspect implementation
rg "@Aspect|@Around.*[Cc]ompletion" --type java -B 2 -A 5 | head -50

Length of output: 73


🏁 Script executed:

#!/bin/bash
# Check the aspect file found
cat core/src/main/java/org/zstack/core/aspect/OwnedByAccountAspectHelper.java

# Search for completion-related aspects more broadly
rg "class.*Aspect" --type java -A 10 | grep -B 5 -A 10 -i completion

# Look for any aspect that might intercept completion methods
rg "pointcut.*completion|execution.*completion" --type java -i -B 3 -A 3

# Search for completion callback handling mechanisms
rg "class.*Completion.*Aspect|CompletionAspect" --type java -B 5 -A 20

Length of output: 1006


🏁 Script executed:

#!/bin/bash
# Search for any aspect definitions in the codebase
find . -name "*Aspect*.java" -type f | grep -v test | head -20

# Look for completion callback guarantee mechanisms
rg "guarantee.*completion|ensure.*completion|completion.*guarantee" --type java -i -B 2 -A 2

# Search for AbstractCompletion and how it's typically used
rg "class AbstractCompletion|interface AbstractCompletion" --type java -A 30 | head -80

# Look for any interceptor or wrapper around completion
rg "intercept.*[Cc]ompletion|wrap.*[Cc]ompletion" --type java -B 3 -A 3

Length of output: 3103


🏁 Script executed:

#!/bin/bash
# Look at the full AbstractCompletion to understand the mechanism
cat header/src/main/java/org/zstack/header/core/AbstractCompletion.java

# Search for any aspect configuration files
find . -name "*.aj" -type f | head -10

# Look for Spring AOP or AspectJ configuration
rg "@Aspect" --type java -B 2 -A 20 | head -100

# Check if there's any completion validation or assertion mechanism
rg "successCalled|failCalled" --type java -B 2 -A 5 | head -50

Length of output: 6349


🏁 Script executed:

#!/bin/bash
# Look at the CompletionSingleCallAspect to understand what it does
cat core/src/main/java/org/zstack/core/aspect/CompletionSingleCallAspect.aj

# Also check AsyncSafeAspect which might be relevant
cat core/src/main/java/org/zstack/core/aspect/AsyncSafeAspect.aj

# Look at ExceptionSafeAspect which might handle exceptions
cat core/src/main/java/org/zstack/core/aspect/ExceptionSafeAspect.aj

Length of output: 6555


@ZStack-Robot

你提出了一个很好的问题!确实代码库中有 AspectJ completion aspect,但它在这个场景下不起作用,原因如下:

技术原因

我找到了相关的 AspectJ aspect 实现:

  1. AsyncSafeAspect.aj - 它的切点定义是:
pointcut asyncSafe1() : execution(* *.*(.., Completion, ..));
pointcut asyncSafe2() : execution(* *.*(.., NoErrorCompletion, ..));
pointcut asyncSafe3() : execution(* *.*(.., ReturnValueCompletion, ..));
  1. 这个 aspect 会拦截所有参数中包含 CompletionNoErrorCompletionReturnValueCompletion 类型的方法,如果方法抛异常,会自动调用 completion.fail()

问题所在

关键问题: executeBatch 的方法签名使用的是 AbstractCompletion 而不是具体的 CompletionReturnValueCompletion:

protected abstract void executeBatch(List<T> items, AbstractCompletion completion);

AspectJ 的 pointcut 是基于方法签名中声明的参数类型进行匹配的,而不是运行时类型。由于 AbstractCompletion 是基类,不是 CompletionReturnValueCompletion,所以 AsyncSafeAspect 的切点不会匹配这个方法,因此不会提供异常保护。

解决方案

有几种方式可以解决:

  1. 在调用处添加 try-catch(推荐):就像我建议的那样,在 ChainTask.run() 中包装 executeBatch 调用

  2. 修改 executeBatch 签名:让子类使用具体的 Completion 类型,但这会破坏当前的抽象设计

  3. 扩展 AspectJ 切点:在 AsyncSafeAspect 中添加 AbstractCompletion 的切点支持,但这可能影响其他使用 AbstractCompletion 的代码

从架构角度看,方案1最安全且不影响现有代码,建议采用。


🧠 Learnings used
Learnt from: zstack-robot-2
Repo: MatheMatrix/zstack PR: 2419
File: plugin/sdnController/src/main/java/org/zstack/sdnController/h3cVcfc/H3cVcfcV2SdnController.java:515-537
Timestamp: 2025-08-13T02:39:40.677Z
Learning: 在ZStack项目中,方法捕获异常后既记录日志又抛出新的RuntimeException是可接受的异常处理模式,不需要修改这种实现方式。

Learnt from: zstack-robot-1
Repo: MatheMatrix/zstack PR: 2360
File: network/src/main/java/org/zstack/network/l3/L3BasicNetwork.java:449-490
Timestamp: 2025-08-04T04:48:19.103Z
Learning: ZStack项目在cherry-pick操作中,即使发现了性能优化机会(如IP地址批量保存的内存优化),也严格遵循不做额外修改的政策,优先保证cherry-pick的完整性和一致性。

Learnt from: ZStack-Robot
Repo: MatheMatrix/zstack PR: 2489
File: storage/src/main/java/org/zstack/storage/snapshot/VolumeTree.java:471-545
Timestamp: 2025-08-22T05:36:40.467Z
Learning: In ZStack项目中,对于核心逻辑相关的代码,团队倾向于暂时不进行重构,即使代码复杂度较高,优先保证核心功能的稳定性。

Learnt from: ZStack-Robot
Repo: MatheMatrix/zstack PR: 2325
File: compute/src/main/java/org/zstack/compute/vm/VmMigrateCallExtensionFlow.java:29-29
Timestamp: 2025-07-24T05:53:10.246Z
Learning: 在ZStack项目的重构过程中,团队采用务实的优先级策略:优先完成影响运行时功能的代码重构,对于注释中的旧引用等非功能性问题可以延后处理,因为这些不会影响系统的实际运行。

Learnt from: zstack-robot-1
Repo: MatheMatrix/zstack PR: 2360
File: compute/src/main/java/org/zstack/compute/vm/StaticIpOperator.java:364-501
Timestamp: 2025-08-04T03:19:07.454Z
Learning: In ZStack project, developers may prefer to keep related validation logic together in a single method (like validateStaticIpTagsInApiMessage with 130+ lines) when the architecture is clear and there's no code reuse, rather than splitting into smaller methods based purely on single responsibility principle.

Learnt from: ZStack-Robot
Repo: MatheMatrix/zstack PR: 2419
File: plugin/sdnController/src/main/java/org/zstack/sdnController/SdnControllerBase.java:986-1067
Timestamp: 2025-08-13T02:31:40.023Z
Learning: 在ZStack项目中,团队倾向于保持统一的业务流程方法不进行拆分,即使方法较长也会维持在单个方法中以保持业务逻辑的完整性。

Learnt from: zstack-robot-2
Repo: MatheMatrix/zstack PR: 2496
File: plugin/sharedMountPointPrimaryStorage/src/main/java/org/zstack/storage/primary/smp/KvmBackend.java:2545-2566
Timestamp: 2025-08-24T07:35:42.793Z
Learning: 在 ZStack 代码库中,当响应类包含数值字段(如 size、actualSize)时,优先使用原始类型(long)而不是包装类型(Long),以避免 NPE 风险和不必要的装箱/拆箱操作。如果 Agent 端可能不设置该字段,应在 Agent 端确保设置默认值,而不是在使用端做 null 检查。

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment from jin.ma:

但是 protected abstract void executeBatch(List items, ReturnValueCompletion completion); protected abstract void executeBatch(List items, Completion completion); 这两个方法的签名是有 Completion 的啊

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment from jin.ma:

@coderabbitai 但是 protected abstract void executeBatch(List items, ReturnValueCompletion completion); protected abstract void executeBatch(List items, Completion completion); 这两个方法的签名是有 Completion 的啊,再解释一下为什么没有生效

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

@MatheMatrix 你说得对,我需要重新分析这个问题。让我深入检查一下实际的代码流程。


🏁 Script executed:

#!/bin/bash
# Find the actual implementation of executeBatch in FlatDhcpBackend
rg "class DhcpApplyQueue" --type java -A 30 plugin/flatNetworkProvider/src/main/java/org/zstack/network/service/flat/FlatDhcpBackend.java

Length of output: 1184


🏁 Script executed:

#!/bin/bash
# Check the exact method signature in the DhcpApplyQueue implementation
rg "executeBatch.*items.*completion" --type java -B 2 -A 10 plugin/flatNetworkProvider/src/main/java/org/zstack/network/service/flat/FlatDhcpBackend.java

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at the CoalesceQueue bridge method pattern more carefully
cat core/src/main/java/org/zstack/core/thread/CoalesceQueue.java

Length of output: 1937


🏁 Script executed:

#!/bin/bash
# Check if there are any test logs or evidence of aspect weaving
rg "@annotation.*AsyncSafe|AsyncSafe.*aspectOf" --type java -B 2 -A 5

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at the full AsyncSafeAspect implementation again more carefully
cat core/src/main/java/org/zstack/core/aspect/AsyncSafeAspect.aj

Length of output: 4011


🏁 Script executed:

#!/bin/bash
# Check if there are actual test cases that verify AsyncSafeAspect works
rg "AsyncSafe" --type java test/ -B 3 -A 10 | head -100

Length of output: 5257


🏁 Script executed:

#!/bin/bash
# Search for any evidence of aspect weaving issues or limitations with bridge methods
rg "bridge.*method|synthetic.*method" --type java -i | grep -i aspect | head -20

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check the test case to see if exceptions in executeBatch are actually caught
grep -B 5 -A 30 "testBatchFailure\|testResultCalculationFailure" test/src/test/java/org/zstack/test/core/thread/TestCoalesceQueue.java

Length of output: 2523


🏁 Script executed:

#!/bin/bash
# Look for any test that actually throws an exception in executeBatch
rg "throw.*Exception|throw new" test/src/test/java/org/zstack/test/core/thread/TestCoalesceQueue.java -B 3 -A 3

Length of output: 2606


🏁 Script executed:

#!/bin/bash
# Check if there are any integration test logs showing aspect interception
rg "executeBatch.*throw|throw.*executeBatch" --type java -B 2 -A 5 | head -50

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at one of the AsyncSafe test files to understand how it's tested
cat test/src/test/java/org/zstack/test/aop/TestAsyncSafe1.java

Length of output: 1399


🏁 Script executed:

#!/bin/bash
# Check the Groovy test case to see if it tests exception handling in executeBatch
rg "throw|exception" test/src/test/groovy/org/zstack/test/integration/core/chaintask/CoalesceQueueCase.groovy -i -B 2 -A 5 | head -100

Length of output: 1932


🏁 Script executed:

#!/bin/bash
# Get the full testBatchFailureNotifiesAllRequests test from Groovy file
grep -B 10 -A 40 "testBatchFailureNotifiesAllRequests" test/src/test/groovy/org/zstack/test/integration/core/chaintask/CoalesceQueueCase.groovy

Length of output: 3492


🏁 Script executed:

#!/bin/bash
# Also check if this Groovy test file is actually being run in CI
find . -name "*.groovy" -path "*/test/*" -exec grep -l "CoalesceQueueCase" {} \;

Length of output: 90


🏁 Script executed:

#!/bin/bash
# Check if there's any test suite that includes this test
rg "CoalesceQueueCase" --type groovy

Length of output: 130


🏁 Script executed:

#!/bin/bash
# Let's also look at the build configuration to see if AspectJ weaving is enabled
find . -name "pom.xml" -exec grep -l "aspectj" {} \; | head -5

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check if AspectJ is configured in the build system
find . -name "pom.xml" -type f | xargs grep -l "aspectj" 2>/dev/null | head -10

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at the core module's pom.xml to see AspectJ configuration
cat core/pom.xml | grep -A 10 -B 5 "aspectj"

Length of output: 44


🏁 Script executed:

#!/bin/bash
# Also check if there's aspectj-maven-plugin configuration
rg "aspectj-maven-plugin|aspectjrt|aspectjweaver" pom.xml --type xml -A 5

Length of output: 47


🏁 Script executed:

#!/bin/bash
# Let's verify the test suite structure - check if CoalesceQueueCase is included
find test -name "*Suite*.groovy" -type f | xargs grep -l "CoalesceQueue" 2>/dev/null

Length of output: 0


@MatheMatrix 你说得对,理论上 AspectJ 应该能拦截。但既然你实际跑 UT 确认了问题存在,让我分析一下可能的原因:

可能的技术原因

1. 桥接方法的拦截时机问题

调用链是这样的:

AbstractCoalesceQueue.doSubmit()
  → executeBatch(items, AbstractCompletion batchCompletion)  // 调用点
    → CoalesceQueue.executeBatch(List<T>, AbstractCompletion)  // 桥接方法,签名不匹配 pointcut
      → (Completion) batchCompletion  // 类型转换
        → DhcpApplyQueue.executeBatch(List, Completion)  // 实现方法,签名匹配 pointcut

问题可能在于:如果异常在类型转换这一行抛出,或者在桥接方法内部抛出(而不是实际的实现方法内部),那这个异常就不在 AsyncSafeAspect 的拦截范围内,因为桥接方法的签名是 executeBatch(List, AbstractCompletion),不匹配 execution(* *.*(.., Completion, ..))

2. 测试环境的 AspectJ 编织可能不完整

可能在测试环境中 AspectJ 的编织配置不完整,导致某些场景下拦截失败。

建议的解决方案

最稳妥的做法是添加显式的 try-catch,原因是:

  1. 防御性编程: 即使 AspectJ 理论上应该生效,显式的异常处理更可靠且易于理解
  2. 清晰的职责: 在 ChainTask 层面统一处理批次失败,确保 cleanup 和 chain.next() 总是被调用
  3. 避免依赖 AspectJ 的隐式行为: AspectJ 的行为可能因配置、编织时机等因素而变化

建议的修改保持不变:

try {
    executeBatch(items, batchCompletion);
} catch (Throwable t) {
    logger.warn(String.format("[%s] executeBatch failed for signature[%s]", name, syncSignature), t);
    batchCompletion.fail(org.zstack.core.Platform.operr("executeBatch failed: %s", t.getMessage()));
}

这样可以:

  • 确保所有异常场景都被正确处理
  • 不依赖 AspectJ 的隐式拦截
  • 代码更易维护和理解

你觉得这个分析合理吗?


@Override
public String getName() {
return String.format("%s-coalesced-batch-%s", AbstractCoalesceQueue.this.getName(), syncSignature);
}

@Override
protected int getSyncLevel() {
return 1;
}
});
}

private void cleanup(String syncSignature) {
signatureQueues.computeIfPresent(syncSignature, (k, queue) -> {
if (queue.isEmpty()) {
return null;
}
return queue;
});
}

// For testing
int getActiveQueueCount() {
return signatureQueues.size();
}
}
61 changes: 61 additions & 0 deletions core/src/main/java/org/zstack/core/thread/CoalesceQueue.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package org.zstack.core.thread;

import org.zstack.header.core.AbstractCompletion;
import org.zstack.header.core.Completion;
import org.zstack.header.errorcode.ErrorCode;

import java.util.List;

/**
* A coalesce queue for requests that do NOT expect a return value.
*
* @param <T> Request Item Type
*/
public abstract class CoalesceQueue<T> extends AbstractCoalesceQueue<T, Void, Void> {

/**
* Submit a request.
*
* @param syncSignature the sync signature; requests with the same signature will be coalesced
* @param item the request item
* @param completion the completion callback
*/
public void submit(String syncSignature, T item, Completion completion) {
submitRequest(syncSignature, item, completion);
}

/**
* Executes the batched requests.
* <p>
* Subclasses must implement this method to process the coalesced items.
*
* @param items the list of coalesced request items
* @param completion the completion callback for the batch execution
*/
protected abstract void executeBatch(List<T> items, Completion completion);

@Override
protected final void executeBatch(List<T> items, AbstractCompletion batchCompletion) {
executeBatch(items, (Completion) batchCompletion);
}

@Override
protected final AbstractCompletion createBatchCompletion(String syncSignature, List<PendingRequest> requests, SyncTaskChain chain) {
return new Completion(chain) {
@Override
public void success() {
handleSuccess(syncSignature, requests, null, chain);
}

@Override
public void fail(ErrorCode errorCode) {
handleFailure(syncSignature, requests, errorCode, chain);
}
};
}

@Override
protected final Void calculateResult(T item, Void batchResult) {
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.zstack.core.thread;

import org.zstack.header.core.AbstractCompletion;
import org.zstack.header.core.ReturnValueCompletion;
import org.zstack.header.errorcode.ErrorCode;

import java.util.List;

/**
* A coalesce queue for requests that expect a return value.
*
* @param <T> Request Item Type
* @param <R> Batch Execution Result Type
* @param <V> Single Request Result Type
*/
public abstract class ReturnValueCoalesceQueue<T, R, V> extends AbstractCoalesceQueue<T, R, V> {

public void submit(String syncSignature, T item, ReturnValueCompletion<V> completion) {
submitRequest(syncSignature, item, completion);
}

protected abstract void executeBatch(List<T> items, ReturnValueCompletion<R> completion);

@Override
protected final void executeBatch(List<T> items, AbstractCompletion batchCompletion) {
executeBatch(items, (ReturnValueCompletion<R>) batchCompletion);
}

@Override
protected final AbstractCompletion createBatchCompletion(String syncSignature, List<PendingRequest> requests, SyncTaskChain chain) {
return new ReturnValueCompletion<R>(null) {
@Override
public void success(R batchResult) {
handleSuccess(syncSignature, requests, batchResult, chain);
}

@Override
public void fail(ErrorCode errorCode) {
handleFailure(syncSignature, requests, errorCode, chain);
}
};
}
}
Loading