@@ -94,7 +94,6 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
9494 uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1;
9595 GPUTPCGMMerger& Merger = processors()->tpcMerger;
9696 GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger;
97- GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger;
9897 const int32_t outputStream = OutputStream();
9998 if (GetProcessingSettings().debugLevel >= 2) {
10099 GPUInfo("Running TPC Merger");
@@ -139,28 +138,28 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
139138 DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingSectorTracks, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile);
140139
141140 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
142- runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter(), NSECTORS * sizeof(*MergerShadowAll .TmpCounter()));
141+ runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter(), NSECTORS * sizeof(*MergerShadow .TmpCounter()));
143142 runKernel<GPUTPCGMMergerMergeWithinPrepare>(GetGridAuto(0, deviceType));
144143 RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType);
145144 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
146145 DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile);
147146
148147 runKernel<GPUTPCGMMergerClearLinks>(GetGridAuto(0, deviceType), false);
149- runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll .TmpCounter()));
148+ runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow .TmpCounter()));
150149 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0);
151150 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
152151 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
153- runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll .TmpCounter()));
152+ runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow .TmpCounter()));
154153 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0);
155154 RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType);
156155 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
157- runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll .TmpCounter()));
156+ runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow .TmpCounter()));
158157 runKernel<GPUTPCGMMergerMergeSectorsPrepare>(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1);
159158 RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType);
160159 RunTPCTrackingMerger_Resolve(0, 1, deviceType);
161160 DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile);
162161
163- runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll .TmpCounter()));
162+ runKernel<GPUMemClean16>({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow .TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadow .TmpCounter()));
164163
165164 runKernel<GPUTPCGMMergerLinkExtrapolatedTracks>(GetGridAuto(0, deviceType));
166165 if (GetProcessingSettings().mergerSanityCheck) {
@@ -200,8 +199,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
200199 if (maxId > Merger.NMaxClusters()) {
201200 throw std::runtime_error("mNMaxClusters too small");
202201 }
203- runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll .SharedCount(), maxId * sizeof(*MergerShadowAll .SharedCount()));
204- runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll .ClusterAttachment(), maxId * sizeof(*MergerShadowAll .ClusterAttachment()));
202+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow .SharedCount(), maxId * sizeof(*MergerShadow .SharedCount()));
203+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow .ClusterAttachment(), maxId * sizeof(*MergerShadow .ClusterAttachment()));
205204 runKernel<GPUTPCGMMergerPrepareForFit, 0>(GetGridAuto(0, deviceType));
206205 CondWaitEvent(waitForTransfer, &mEvents->single);
207206 runKernel<GPUTPCGMMergerSortTracksQPt>(GetGridAuto(0, deviceType));
@@ -226,6 +225,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
226225 mOutputQueue.clear();
227226 }
228227
228+ if (param().rec.tpc.rebuildTrackInFit) {
229+ runKernel<GPUMemClean16>({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadow.ClusterCandidates(), Merger.NMergedTracks() * GPUCA_ROW_COUNT * param().rec.tpc.rebuildTrackInFitClusterCandidates * sizeof(*MergerShadow.ClusterCandidates()));
230+ }
229231 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 0);
230232 if (param().rec.tpc.rebuildTrackInFit) {
231233 runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 1);
@@ -260,13 +262,13 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
260262 throw std::runtime_error("QA Scratch buffer exceeded");
261263 }
262264 }
263- GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll .MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent);
265+ GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadow .MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent);
264266 waitEvent = nullptr;
265267 if (param().dodEdxEnabled) {
266- GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll .MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
268+ GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadow .MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
267269 }
268- GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll .Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
269- GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll .ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
270+ GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadow .Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
271+ GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadow .ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
270272 }
271273 if (GetProcessingSettings().outputSharedClusterMap) {
272274 TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputState(), outputStream, nullptr, waitEvent);
0 commit comments