From 08287a86e1fc96cf17c8df98ce42db3de24feede Mon Sep 17 00:00:00 2001 From: FlyAntNotDown <461425614@qq.com> Date: Thu, 25 Jun 2026 23:50:59 +0800 Subject: [PATCH 1/7] fix: honor buffer map offset/length and correct vulkan host-access flags VulkanBuffer::Map applied no offset to the returned pointer and never flushed/invalidated non-coherent memory; it now returns base+offset, invalidates on a read map and flushes on unmap of a write map. Buffer allocation keyed host-access flags off `usages | mapWrite`, which is always true and forced every buffer into host-visible memory; it now uses `& mapWrite` for sequential-write and `& mapRead` for random-access host memory. DX12Buffer::Map likewise returned the resource base without adding the offset; it now returns base+offset to match the contract. --- Engine/Source/RHI-DirectX12/Src/Buffer.cpp | 2 +- .../RHI-Vulkan/Include/RHI/Vulkan/Buffer.h | 3 +++ Engine/Source/RHI-Vulkan/Src/Buffer.cpp | 21 ++++++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Engine/Source/RHI-DirectX12/Src/Buffer.cpp b/Engine/Source/RHI-DirectX12/Src/Buffer.cpp index 4d25b595..93b0b399 100644 --- a/Engine/Source/RHI-DirectX12/Src/Buffer.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Buffer.cpp @@ -80,7 +80,7 @@ namespace RHI::DirectX12 { void* data; const CD3DX12_RANGE range(inOffset, inOffset + inLength); Assert(SUCCEEDED(nativeResource->Map(0, &range, &data))); - return data; + return static_cast(data) + inOffset; } void DX12Buffer::Unmap() diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h index f3586f4e..8a679008 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h @@ -34,5 +34,8 @@ namespace RHI::Vulkan { VkBuffer nativeBuffer; VmaAllocation nativeAllocation; BufferUsageFlags usages; + MapMode mapMode; + size_t mapOffset; + size_t mapLength; }; } diff --git a/Engine/Source/RHI-Vulkan/Src/Buffer.cpp b/Engine/Source/RHI-Vulkan/Src/Buffer.cpp index eba554e0..20391e49 100644 --- a/Engine/Source/RHI-Vulkan/Src/Buffer.cpp +++ b/Engine/Source/RHI-Vulkan/Src/Buffer.cpp @@ -16,6 +16,9 @@ namespace RHI::Vulkan { : Buffer(inCreateInfo) , device(inDevice) , usages(inCreateInfo.usages) + , mapMode(MapMode::read) + , mapOffset(0) + , mapLength(0) { CreateNativeBuffer(inCreateInfo); TransitionToInitState(inCreateInfo); @@ -28,15 +31,25 @@ namespace RHI::Vulkan { } } - void* VulkanBuffer::Map(MapMode inMapMode, size_t inOffset, size_t inLength) + void* VulkanBuffer::Map(const MapMode inMapMode, const size_t inOffset, const size_t inLength) { + mapMode = inMapMode; + mapOffset = inOffset; + mapLength = inLength; + void* data; Assert(vmaMapMemory(device.GetNativeAllocator(), nativeAllocation, &data) == VK_SUCCESS); - return data; + if (inMapMode == MapMode::read) { + Assert(vmaInvalidateAllocation(device.GetNativeAllocator(), nativeAllocation, inOffset, inLength) == VK_SUCCESS); + } + return static_cast(data) + inOffset; } void VulkanBuffer::Unmap() { + if (mapMode == MapMode::write) { + Assert(vmaFlushAllocation(device.GetNativeAllocator(), nativeAllocation, mapOffset, mapLength) == VK_SUCCESS); + } vmaUnmapMemory(device.GetNativeAllocator(), nativeAllocation); } @@ -55,8 +68,10 @@ namespace RHI::Vulkan { VmaAllocationCreateInfo allocInfo = {}; allocInfo.usage = VMA_MEMORY_USAGE_AUTO; - if (inCreateInfo.usages | BufferUsageBits::mapWrite) { + if (inCreateInfo.usages & BufferUsageBits::mapWrite) { allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + } else if (inCreateInfo.usages & BufferUsageBits::mapRead) { + allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; } Assert(vmaCreateBuffer(device.GetNativeAllocator(), &bufferInfo, &allocInfo, &nativeBuffer, &nativeAllocation, nullptr) == VK_SUCCESS); From eca06873e4d7d754bc058f4be479ed5e9500c215 Mon Sep 17 00:00:00 2001 From: FlyAntNotDown <461425614@qq.com> Date: Fri, 26 Jun 2026 09:03:29 +0800 Subject: [PATCH 2/7] fix: correct buffer-texture copy footprints for sub-region and mips The buffer<->texture copy paths only produced correct results when copying a whole sub-resource at the base mip; sub-region and multi-mip/3D copies were wrong: - Vulkan: bufferRowLength/bufferImageHeight are measured in texels, but the helper assigned byte pitches; drive them from the sub-resource footprint. - DirectX12: the staging footprint used copyRegion for its extent, making the slice stride diverge from the documented footprint; use the full extent and let the copy box select the window. - GetTextureSubResourceCopyFootprint ignored the mip level on Vulkan (always base extent) and mis-indexed array layers on DirectX12; both now report the per-mip, per-layer footprint (with 3D depth halved per mip). - Texture asset upload: UpdateMips under-allocated 3D sub-resources (missing depth) and the staging fill/copy used base width/height/depth for every mip; both are now driven by the per-mip footprint, fixing an out-of-bounds write for array textures. --- .../RHI-DirectX12/Src/CommandRecorder.cpp | 8 +++++--- Engine/Source/RHI-DirectX12/Src/Device.cpp | 3 ++- .../Source/RHI-Vulkan/Src/CommandRecorder.cpp | 12 +++++------- Engine/Source/RHI-Vulkan/Src/Device.cpp | 10 ++++++++-- Engine/Source/Runtime/Src/Asset/Texture.cpp | 18 +++++++++--------- 5 files changed, 29 insertions(+), 22 deletions(-) diff --git a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp index f60285cf..7faf502b 100644 --- a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp @@ -48,12 +48,14 @@ namespace RHI::DirectX12 { { const auto aspectLayout = device.GetTextureSubResourceCopyFootprint(texture, copyInfo.textureSubResource); // NOLINT + // The buffer is laid out as the full sub-resource footprint (so the slice stride is RowPitch * full height); + // the copied window is selected by the box passed to CopyTextureRegion, not by shrinking this footprint. D3D12_PLACED_SUBRESOURCE_FOOTPRINT bufferLayout; bufferLayout.Offset = copyInfo.bufferOffset; bufferLayout.Footprint.Format = texture.GetNative()->GetDesc().Format; - bufferLayout.Footprint.Width = copyInfo.copyRegion.x; - bufferLayout.Footprint.Height = copyInfo.copyRegion.y; - bufferLayout.Footprint.Depth = copyInfo.copyRegion.z; + bufferLayout.Footprint.Width = aspectLayout.extent.x; + bufferLayout.Footprint.Height = aspectLayout.extent.y; + bufferLayout.Footprint.Depth = aspectLayout.extent.z; bufferLayout.Footprint.RowPitch = aspectLayout.rowPitch; return { buffer.GetNative(), bufferLayout }; } diff --git a/Engine/Source/RHI-DirectX12/Src/Device.cpp b/Engine/Source/RHI-DirectX12/Src/Device.cpp index efe625f7..7d6e4478 100644 --- a/Engine/Source/RHI-DirectX12/Src/Device.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Device.cpp @@ -267,7 +267,8 @@ namespace RHI::DirectX12 { const auto createInfo = texture.GetCreateInfo(); const auto nativeResourceDesc = dx12Texture.GetNative()->GetDesc(); - const size_t nativeSubResourceIndex = D3D12CalcSubresource(subResourceInfo.mipLevel, subResourceInfo.arrayLayer, 0, 1, 1); + const auto arraySize = createInfo.dimension == TextureDimension::t3D ? 1 : createInfo.depthOrArraySize; + const size_t nativeSubResourceIndex = D3D12CalcSubresource(subResourceInfo.mipLevel, subResourceInfo.arrayLayer, 0, createInfo.mipLevels, arraySize); D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; nativeDevice->GetCopyableFootprints(&nativeResourceDesc, nativeSubResourceIndex, 1, 0, &footprint, nullptr, nullptr, nullptr); diff --git a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp index 9a1b1233..fec7fe8f 100644 --- a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp @@ -137,16 +137,14 @@ namespace RHI::Vulkan { static VkBufferImageCopy GetNativeBufferImageCopy(Device& device, const Texture& texture, const BufferTextureCopyInfo& copyInfo) { - const auto aspectLayout = device.GetTextureSubResourceCopyFootprint(texture, copyInfo.textureSubResource); // NOLINT - const auto createInfo = texture.GetCreateInfo(); - - const auto linearRowPitch = GetBytesPerPixel(createInfo.format) * copyInfo.copyRegion.x; - const auto linearSlicePitch = linearRowPitch * copyInfo.copyRegion.y; + const auto footprint = device.GetTextureSubResourceCopyFootprint(texture, copyInfo.textureSubResource); // NOLINT VkBufferImageCopy result {}; result.bufferOffset = copyInfo.bufferOffset; - result.bufferRowLength = aspectLayout.rowPitch == linearRowPitch ? 0 : aspectLayout.rowPitch; - result.bufferImageHeight = aspectLayout.slicePitch == linearSlicePitch ? 0 : aspectLayout.slicePitch; + // bufferRowLength/bufferImageHeight are measured in texels and describe how the linear buffer data is strided; + // they mirror the full sub-resource footprint, while imageExtent selects the copied window within it. + result.bufferRowLength = static_cast(footprint.rowPitch / footprint.bytesPerPixel); + result.bufferImageHeight = footprint.extent.y; result.imageOffset = { static_cast(copyInfo.textureOrigin.x), static_cast(copyInfo.textureOrigin.y), static_cast(copyInfo.textureOrigin.z) }; result.imageExtent = { copyInfo.copyRegion.x, copyInfo.copyRegion.y, copyInfo.copyRegion.z }; result.imageSubresource = GetNativeImageSubResourceLayers(copyInfo.textureSubResource); diff --git a/Engine/Source/RHI-Vulkan/Src/Device.cpp b/Engine/Source/RHI-Vulkan/Src/Device.cpp index e9688802..1826cf85 100644 --- a/Engine/Source/RHI-Vulkan/Src/Device.cpp +++ b/Engine/Source/RHI-Vulkan/Src/Device.cpp @@ -170,13 +170,19 @@ namespace RHI::Vulkan { TextureSubResourceCopyFootprint VulkanDevice::GetTextureSubResourceCopyFootprint(const Texture& texture, const TextureSubResourceInfo& subResourceInfo) { const auto& createInfo = texture.GetCreateInfo(); + const auto mipLevel = subResourceInfo.mipLevel; + const auto baseDepth = createInfo.dimension == TextureDimension::t3D ? createInfo.depthOrArraySize : 1; TextureSubResourceCopyFootprint result {}; - result.extent = { createInfo.width, createInfo.height, createInfo.dimension == TextureDimension::t3D ? createInfo.depthOrArraySize : 1 }; + result.extent = { + std::max(createInfo.width >> mipLevel, 1u), + std::max(createInfo.height >> mipLevel, 1u), + std::max(baseDepth >> mipLevel, 1u) + }; result.bytesPerPixel = GetBytesPerPixel(createInfo.format); result.rowPitch = result.bytesPerPixel * result.extent.x; result.slicePitch = result.rowPitch * result.extent.y; - result.totalBytes = result.bytesPerPixel * result.extent.x * result.extent.y * result.extent.z; + result.totalBytes = result.slicePitch * result.extent.z; return result; } diff --git a/Engine/Source/Runtime/Src/Asset/Texture.cpp b/Engine/Source/Runtime/Src/Asset/Texture.cpp index d3cd7152..449a2be1 100644 --- a/Engine/Source/Runtime/Src/Asset/Texture.cpp +++ b/Engine/Source/Runtime/Src/Asset/Texture.cpp @@ -184,6 +184,7 @@ namespace Runtime { void Texture::UpdateMips() { const auto arraySize = type == TextureType::t3D ? 1 : depthOrArraySize; + const auto depth = type == TextureType::t3D ? depthOrArraySize : 1; const auto bytesPerPixel = RHI::GetBytesPerPixel(static_cast(format)); subResourcePixelsData.clear(); @@ -192,9 +193,10 @@ namespace Runtime { for (auto m = 0; m < mipLevels; m++) { const auto mipWidth = std::max(width >> m, 1u); const auto mipHeight = std::max(height >> m, 1u); + const auto mipDepth = std::max(depth >> m, 1u); for (auto a = 0; a < arraySize; a++) { - subResourcePixelsData[Internal::GetSubResourceIndex(m, a, arraySize)].resize(mipWidth * mipHeight * bytesPerPixel); + subResourcePixelsData[Internal::GetSubResourceIndex(m, a, arraySize)].resize(mipWidth * mipHeight * mipDepth * bytesPerPixel); } } } @@ -230,8 +232,6 @@ namespace Runtime { texturePtr = texture.Get(), type = type, format = format, - width = width, - height = height, depthOrArraySize = depthOrArraySize, mipLevels = mipLevels, aspect = Internal::GetTextureAspect(format), @@ -239,7 +239,6 @@ namespace Runtime { name = name ]() -> void { const auto arraySize = type == TextureType::t3D ? 1 : depthOrArraySize; - const auto depth = type == TextureType::t3D ? depthOrArraySize : 1; std::vector copyFootprints; copyFootprints.reserve(mipLevels * arraySize); @@ -261,8 +260,7 @@ namespace Runtime { .SetInitialState(RHI::BufferState::staging) .SetDebugName(std::format("StagingBuffer-{}", name))); - const auto srcRowPitch = width * RHI::GetBytesPerPixel(static_cast(format)); - const auto srcSlicePitch = width * height * RHI::GetBytesPerPixel(static_cast(format)); + const auto bytesPerPixel = RHI::GetBytesPerPixel(static_cast(format)); size_t dstSubResourceOffset = 0; auto* dstData = static_cast(stagingBuffer->Map(RHI::MapMode::write, 0, totalBytes)); @@ -272,8 +270,10 @@ namespace Runtime { const auto& srcPixels = subResourcePixelsData[subResourceIndex]; const auto& dstCopyFootprint = copyFootprints[subResourceIndex]; - for (auto z = 0; z < depthOrArraySize; z++) { - for (auto y = 0; y < height; y++) { + const auto srcRowPitch = dstCopyFootprint.extent.x * bytesPerPixel; + const auto srcSlicePitch = srcRowPitch * dstCopyFootprint.extent.y; + for (auto z = 0u; z < dstCopyFootprint.extent.z; z++) { + for (auto y = 0u; y < dstCopyFootprint.extent.y; y++) { const auto* src = srcPixels.data() + srcSlicePitch * z + srcRowPitch * y; auto* dst = dstData + dstSubResourceOffset + dstCopyFootprint.slicePitch * z + dstCopyFootprint.rowPitch * y; memcpy(dst, src, srcRowPitch); @@ -300,7 +300,7 @@ namespace Runtime { .SetBufferOffset(dstSubResourceOffset) .SetTextureSubResource(RHI::TextureSubResourceInfo(m, a, aspect)) .SetTextureOrigin({ 0, 0, 0 }) - .SetCopyRegion({ width, height, depth })); + .SetCopyRegion(copyFootprints[subResourceIndex].extent)); dstSubResourceOffset += copyFootprints[subResourceIndex].totalBytes; } } From f1bf474845deace48004b2967a4b4500514e75b1 Mon Sep 17 00:00:00 2001 From: FlyAntNotDown <461425614@qq.com> Date: Fri, 26 Jun 2026 14:53:57 +0800 Subject: [PATCH 3/7] feat: implement indirect draw commands for rhi Add DrawIndirect/DrawIndexedIndirect/MultiDrawIndirect/MultiDrawIndexedIndirect to RasterPassCommandRecorder across the Vulkan, DirectX12 and Dummy backends, with DrawIndirectArguments/DrawIndexedIndirectArguments describing the indirect buffer layout. Add BufferState::indirect with the matching Vulkan barrier and DX12 resource state mappings. DirectX12 pre-creates the draw and draw-indexed command signatures at device startup. --- .../Include/RHI/DirectX12/CommandRecorder.h | 4 +++ .../Include/RHI/DirectX12/Common.h | 1 + .../Include/RHI/DirectX12/Device.h | 5 +++ .../RHI-DirectX12/Src/CommandRecorder.cpp | 22 ++++++++++++ Engine/Source/RHI-DirectX12/Src/Device.cpp | 34 +++++++++++++++++++ .../Include/RHI/Dummy/CommandRecorder.h | 4 +++ .../Source/RHI-Dummy/Src/CommandRecorder.cpp | 16 +++++++++ .../Include/RHI/Vulkan/CommandRecorder.h | 4 +++ .../Source/RHI-Vulkan/Src/CommandRecorder.cpp | 32 ++++++++++++++--- .../Source/RHI/Include/RHI/CommandRecorder.h | 22 ++++++++++-- Engine/Source/RHI/Include/RHI/Common.h | 1 + 11 files changed, 138 insertions(+), 7 deletions(-) diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h index 0b583ebb..5d07a29c 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h @@ -95,6 +95,10 @@ namespace RHI::DirectX12 { void SetPrimitiveTopology(PrimitiveTopology inPrimitiveTopology) override; void SetBlendConstant(const float* inConstants) override; void SetStencilReference(uint32_t inReference) override; + void DrawIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void DrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void MultiDrawIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; + void MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; void EndPass() override; private: diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h index 295daae1..e6e39994 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h @@ -252,6 +252,7 @@ namespace RHI::DirectX12 { ECIMPL_ITEM(BufferState::shaderReadOnly, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) ECIMPL_ITEM(BufferState::storage, D3D12_RESOURCE_STATE_COMMON) ECIMPL_ITEM(BufferState::rwStorage, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) + ECIMPL_ITEM(BufferState::indirect, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT) ECIMPL_END(D3D12_RESOURCE_STATES) ECIMPL_BEGIN(TextureDimension, D3D12_RESOURCE_DIMENSION) diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h index 3bb94b96..35692a15 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h @@ -95,6 +95,8 @@ namespace RHI::DirectX12 { TextureSubResourceCopyFootprint GetTextureSubResourceCopyFootprint(const Texture& texture, const TextureSubResourceInfo& subResourceInfo) override; ID3D12Device* GetNative() const; + ID3D12CommandSignature* GetDrawIndirectCommandSignature() const; + ID3D12CommandSignature* GetDrawIndexedIndirectCommandSignature() const; Common::UniquePtr AllocateRtvDescriptor() const; Common::UniquePtr AllocateCbvSrvUavDescriptor() const; Common::UniquePtr AllocateSamplerDescriptor() const; @@ -105,6 +107,7 @@ namespace RHI::DirectX12 { void CreateNativeQueues(const DeviceCreateInfo& inCreateInfo); void QueryNativeDescriptorSize(); void CreateDescriptorPools(); + void CreateDrawIndirectCommandSignatures(); #if BUILD_CONFIG_DEBUG void RegisterNativeDebugLayerExceptionHandler(); void UnregisterNativeDebugLayerExceptionHandler(); @@ -122,5 +125,7 @@ namespace RHI::DirectX12 { Common::UniquePtr samplerDescriptorPool; Common::UniquePtr dsvDescriptorPool; ComPtr nativeDevice; + ComPtr drawIndirectCommandSignature; + ComPtr drawIndexedIndirectCommandSignature; }; } diff --git a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp index 7faf502b..a7e88095 100644 --- a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp @@ -331,6 +331,28 @@ namespace RHI::DirectX12 { commandBuffer.GetNativeCmdList()->OMSetStencilRef(inReference); } + void DX12RasterPassCommandRecorder::DrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndirect(inIndirectBuffer, inOffset, 1); + } + + void DX12RasterPassCommandRecorder::DrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndexedIndirect(inIndirectBuffer, inOffset, 1); + } + + void DX12RasterPassCommandRecorder::MultiDrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + commandBuffer.GetNativeCmdList()->ExecuteIndirect(device.GetDrawIndirectCommandSignature(), inDrawCount, indirectBuffer->GetNative(), inOffset, nullptr, 0); + } + + void DX12RasterPassCommandRecorder::MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + commandBuffer.GetNativeCmdList()->ExecuteIndirect(device.GetDrawIndexedIndirectCommandSignature(), inDrawCount, indirectBuffer->GetNative(), inOffset, nullptr, 0); + } + void DX12RasterPassCommandRecorder::EndPass() { } diff --git a/Engine/Source/RHI-DirectX12/Src/Device.cpp b/Engine/Source/RHI-DirectX12/Src/Device.cpp index 7d6e4478..eb07ff72 100644 --- a/Engine/Source/RHI-DirectX12/Src/Device.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Device.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include namespace RHI::DirectX12 { @@ -152,6 +153,7 @@ namespace RHI::DirectX12 { CreateNativeQueues(inCreateInfo); QueryNativeDescriptorSize(); CreateDescriptorPools(); + CreateDrawIndirectCommandSignatures(); #if BUILD_CONFIG_DEBUG RegisterNativeDebugLayerExceptionHandler(); #endif @@ -287,6 +289,16 @@ namespace RHI::DirectX12 { return nativeDevice.Get(); } + ID3D12CommandSignature* DX12Device::GetDrawIndirectCommandSignature() const + { + return drawIndirectCommandSignature.Get(); + } + + ID3D12CommandSignature* DX12Device::GetDrawIndexedIndirectCommandSignature() const + { + return drawIndexedIndirectCommandSignature.Get(); + } + Common::UniquePtr DX12Device::AllocateRtvDescriptor() const { return rtvDescriptorPool->Allocate(); @@ -356,6 +368,28 @@ namespace RHI::DirectX12 { dsvDescriptorPool = Common::MakeUnique(*this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, nativeDsvDescriptorSize, 16); } + void DX12Device::CreateDrawIndirectCommandSignatures() + { + const auto createSignature = [this](const D3D12_INDIRECT_ARGUMENT_TYPE inArgumentType, const uint32_t inStride) -> ComPtr { + D3D12_INDIRECT_ARGUMENT_DESC argumentDesc {}; + argumentDesc.Type = inArgumentType; + + D3D12_COMMAND_SIGNATURE_DESC commandSignatureDesc {}; + commandSignatureDesc.ByteStride = inStride; + commandSignatureDesc.NumArgumentDescs = 1; + commandSignatureDesc.pArgumentDescs = &argumentDesc; + commandSignatureDesc.NodeMask = 0; + + // A command signature carrying only a draw argument changes no root parameters, so no root signature is required. + ComPtr commandSignature; + Assert(SUCCEEDED(nativeDevice->CreateCommandSignature(&commandSignatureDesc, nullptr, IID_PPV_ARGS(&commandSignature)))); + return commandSignature; + }; + + drawIndirectCommandSignature = createSignature(D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, sizeof(DrawIndirectArguments)); + drawIndexedIndirectCommandSignature = createSignature(D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, sizeof(DrawIndexedIndirectArguments)); + } + #if BUILD_CONFIG_DEBUG void DX12Device::RegisterNativeDebugLayerExceptionHandler() { diff --git a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h index c45d5289..5db65b8d 100644 --- a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h +++ b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h @@ -79,6 +79,10 @@ namespace RHI::Dummy { void SetPrimitiveTopology(PrimitiveTopology primitiveTopology) override; void SetBlendConstant(const float*/*[4]*/ constants) override; void SetStencilReference(uint32_t reference) override; + void DrawIndirect(Buffer* indirectBuffer, size_t offset) override; + void DrawIndexedIndirect(Buffer* indirectBuffer, size_t offset) override; + void MultiDrawIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) override; + void MultiDrawIndexedIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) override; void EndPass() override; }; } diff --git a/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp b/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp index f981c7c2..09bf36ae 100644 --- a/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp @@ -117,6 +117,22 @@ namespace RHI::Dummy { { } + void DummyRasterPassCommandRecorder::DrawIndirect(Buffer* indirectBuffer, size_t offset) + { + } + + void DummyRasterPassCommandRecorder::DrawIndexedIndirect(Buffer* indirectBuffer, size_t offset) + { + } + + void DummyRasterPassCommandRecorder::MultiDrawIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) + { + } + + void DummyRasterPassCommandRecorder::MultiDrawIndexedIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) + { + } + void DummyRasterPassCommandRecorder::EndPass() { } diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h index cdad3f56..6cb31e96 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h @@ -97,6 +97,10 @@ namespace RHI::Vulkan { void SetPrimitiveTopology(PrimitiveTopology inPrimitiveTopology) override; void SetBlendConstant(const float* inConstants) override; void SetStencilReference(uint32_t inReference) override; + void DrawIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void DrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void MultiDrawIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; + void MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; void EndPass() override; private: diff --git a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp index fec7fe8f..fa0d7857 100644 --- a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp @@ -27,7 +27,8 @@ namespace RHI::Vulkan { { BufferState::copyDst, VK_ACCESS_TRANSFER_WRITE_BIT }, { BufferState::shaderReadOnly, VK_ACCESS_SHADER_READ_BIT }, { BufferState::storage, VK_ACCESS_SHADER_READ_BIT }, - { BufferState::rwStorage, VK_ACCESS_SHADER_WRITE_BIT } + { BufferState::rwStorage, VK_ACCESS_SHADER_WRITE_BIT }, + { BufferState::indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT } }; return map.at(inState); } @@ -41,8 +42,8 @@ namespace RHI::Vulkan { { BufferState::copyDst, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::shaderReadOnly, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, - { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } - + { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, + { BufferState::indirect, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT } }; return map.at(inState); } @@ -56,7 +57,8 @@ namespace RHI::Vulkan { { BufferState::copyDst, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::shaderReadOnly, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, - { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } + { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, + { BufferState::indirect, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT } }; return map.at(inState); } @@ -494,6 +496,28 @@ namespace RHI::Vulkan { vkCmdSetStencilReference(commandBuffer.GetNative(), VK_STENCIL_FACE_FRONT_AND_BACK, inReference); } + void VulkanRasterPassCommandRecorder::DrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndirect(inIndirectBuffer, inOffset, 1); + } + + void VulkanRasterPassCommandRecorder::DrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndexedIndirect(inIndirectBuffer, inOffset, 1); + } + + void VulkanRasterPassCommandRecorder::MultiDrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + vkCmdDrawIndirect(commandBuffer.GetNative(), indirectBuffer->GetNative(), inOffset, inDrawCount, sizeof(DrawIndirectArguments)); + } + + void VulkanRasterPassCommandRecorder::MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + vkCmdDrawIndexedIndirect(commandBuffer.GetNative(), indirectBuffer->GetNative(), inOffset, inDrawCount, sizeof(DrawIndexedIndirectArguments)); + } + void VulkanRasterPassCommandRecorder::EndPass() { auto* pfn = device.GetGpu().GetInstance().FindOrGetTypedDynamicFuncPointer("vkCmdEndRenderingKHR"); diff --git a/Engine/Source/RHI/Include/RHI/CommandRecorder.h b/Engine/Source/RHI/Include/RHI/CommandRecorder.h index ac747439..fbeea7e4 100644 --- a/Engine/Source/RHI/Include/RHI/CommandRecorder.h +++ b/Engine/Source/RHI/Include/RHI/CommandRecorder.h @@ -92,6 +92,21 @@ namespace RHI { BufferTextureCopyInfo& SetCopyRegion(const Common::UVec3& inCopyRegion); }; + struct DrawIndirectArguments { + uint32_t vertexCount = 0; + uint32_t instanceCount = 0; + uint32_t firstVertex = 0; + uint32_t firstInstance = 0; + }; + + struct DrawIndexedIndirectArguments { + uint32_t indexCount = 0; + uint32_t instanceCount = 0; + uint32_t firstIndex = 0; + int32_t baseVertex = 0; + uint32_t firstInstance = 0; + }; + template struct ColorAttachmentBase { LoadOp loadOp; @@ -232,9 +247,10 @@ namespace RHI { virtual void SetPrimitiveTopology(PrimitiveTopology primitiveTopology) = 0; virtual void SetBlendConstant(const float*/*[4]*/ constants) = 0; virtual void SetStencilReference(uint32_t reference) = 0; - // TODO DrawIndirect(...) - // TODO DrawIndexedIndirect(...) - // TODO MultiIndirectDraw(...) + virtual void DrawIndirect(Buffer* indirectBuffer, size_t offset) = 0; + virtual void DrawIndexedIndirect(Buffer* indirectBuffer, size_t offset) = 0; + virtual void MultiDrawIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) = 0; + virtual void MultiDrawIndexedIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) = 0; virtual void EndPass() = 0; protected: diff --git a/Engine/Source/RHI/Include/RHI/Common.h b/Engine/Source/RHI/Include/RHI/Common.h index 594174c0..f5ba3853 100644 --- a/Engine/Source/RHI/Include/RHI/Common.h +++ b/Engine/Source/RHI/Include/RHI/Common.h @@ -392,6 +392,7 @@ namespace RHI { shaderReadOnly, storage, rwStorage, + indirect, max }; From 6b4e1bdd5f37f69025a951600a30e42a05963819 Mon Sep 17 00:00:00 2001 From: kindem Date: Sat, 27 Jun 2026 12:52:12 +0800 Subject: [PATCH 4/7] feat: add debug markers to rhi command recorder --- .../Include/RHI/DirectX12/CommandRecorder.h | 8 +++ .../RHI-DirectX12/Src/CommandRecorder.cpp | 50 +++++++++++++++++++ .../Include/RHI/Dummy/CommandRecorder.h | 8 +++ .../Source/RHI-Dummy/Src/CommandRecorder.cpp | 32 ++++++++++++ .../Include/RHI/Vulkan/CommandRecorder.h | 8 +++ .../Source/RHI-Vulkan/Src/CommandRecorder.cpp | 50 +++++++++++++++++++ .../Source/RHI/Include/RHI/CommandRecorder.h | 22 ++++++++ Engine/Source/RHI/Src/CommandRecorder.cpp | 11 ++++ Engine/Source/Render/Src/RenderGraph.cpp | 3 ++ 9 files changed, 192 insertions(+) diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h index 5d07a29c..ed512d16 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h @@ -20,6 +20,8 @@ namespace RHI::DirectX12 { ~DX12CommandRecorder() override; void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; Common::UniquePtr BeginCopyPass() override; Common::UniquePtr BeginComputePass() override; Common::UniquePtr BeginRasterPass(const RasterPassBeginInfo& inBeginInfo) override; @@ -38,6 +40,8 @@ namespace RHI::DirectX12 { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // CopyPassCommandRecorder void CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) override; @@ -60,6 +64,8 @@ namespace RHI::DirectX12 { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // ComputePassCommandRecorder void SetPipeline(ComputePipeline* inPipeline) override; @@ -82,6 +88,8 @@ namespace RHI::DirectX12 { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // RasterPassCommandRecorder void SetPipeline(RasterPipeline* inPipeline) override; diff --git a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp index a7e88095..9dd0c129 100644 --- a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp @@ -73,6 +73,12 @@ namespace RHI::DirectX12 { } } +namespace RHI::DirectX12::Internal { + // PIX_EVENT_ANSI_VERSION: marks the BeginEvent payload as a plain ANSI string, decoded by RenderDoc/PIX without the + // WinPixEventRuntime dependency. + constexpr UINT pixEventAnsiVersion = 1; +} + namespace RHI::DirectX12 { DX12CopyPassCommandRecorder::DX12CopyPassCommandRecorder(DX12Device& inDevice, DX12CommandRecorder& inCmdRecorder, DX12CommandBuffer& inCmdBuffer) : device(inDevice) @@ -88,6 +94,16 @@ namespace RHI::DirectX12 { commandRecorder.ResourceBarrier(inBarrier); } + void DX12CopyPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void DX12CopyPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void DX12CopyPassCommandRecorder::CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) { const auto* srcBuffer = static_cast(src); @@ -170,6 +186,16 @@ namespace RHI::DirectX12 { commandRecorder.ResourceBarrier(inBarrier); } + void DX12ComputePassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void DX12ComputePassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void DX12ComputePassCommandRecorder::SetPipeline(ComputePipeline* inPipeline) { computePipeline = static_cast(inPipeline); @@ -253,6 +279,16 @@ namespace RHI::DirectX12 { commandRecorder.ResourceBarrier(inBarrier); } + void DX12RasterPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void DX12RasterPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void DX12RasterPassCommandRecorder::SetPipeline(RasterPipeline* inPipeline) { rasterPipeline = static_cast(inPipeline); @@ -407,6 +443,20 @@ namespace RHI::DirectX12 { commandBuffer.GetNativeCmdList()->ResourceBarrier(1, &resourceBarrier); } + void DX12CommandRecorder::BeginMarker(const std::string& inLabel) + { +#if BUILD_CONFIG_DEBUG + commandBuffer.GetNativeCmdList()->BeginEvent(Internal::pixEventAnsiVersion, inLabel.c_str(), static_cast(inLabel.size() + 1)); +#endif + } + + void DX12CommandRecorder::EndMarker() + { +#if BUILD_CONFIG_DEBUG + commandBuffer.GetNativeCmdList()->EndEvent(); +#endif + } + Common::UniquePtr DX12CommandRecorder::BeginCopyPass() { return Common::UniquePtr(new DX12CopyPassCommandRecorder(device, *this, commandBuffer)); diff --git a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h index 5db65b8d..0d6b007e 100644 --- a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h +++ b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h @@ -16,6 +16,8 @@ namespace RHI::Dummy { ~DummyCommandRecorder() override; void ResourceBarrier(const Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; Common::UniquePtr BeginCopyPass() override; Common::UniquePtr BeginComputePass() override; Common::UniquePtr BeginRasterPass(const RasterPassBeginInfo& beginInfo) override; @@ -33,6 +35,8 @@ namespace RHI::Dummy { // CommonCommandRecorder void ResourceBarrier(const RHI::Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; // CopyPassCommandRecorder void CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) override; @@ -50,6 +54,8 @@ namespace RHI::Dummy { // CommonCommandRecorder void ResourceBarrier(const RHI::Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; // ComputePassCommandRecorder void SetPipeline(ComputePipeline* pipeline) override; @@ -66,6 +72,8 @@ namespace RHI::Dummy { // CommonCommandRecorder void ResourceBarrier(const RHI::Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; // RasterPassCommandRecorder void SetPipeline(RasterPipeline* pipeline) override; diff --git a/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp b/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp index 09bf36ae..4d891b08 100644 --- a/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp @@ -17,6 +17,14 @@ namespace RHI::Dummy { { } + void DummyCopyPassCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyCopyPassCommandRecorder::EndMarker() + { + } + void DummyCopyPassCommandRecorder::CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) { } @@ -47,6 +55,14 @@ namespace RHI::Dummy { { } + void DummyComputePassCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyComputePassCommandRecorder::EndMarker() + { + } + void DummyComputePassCommandRecorder::SetPipeline(ComputePipeline* pipeline) { } @@ -73,6 +89,14 @@ namespace RHI::Dummy { { } + void DummyRasterPassCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyRasterPassCommandRecorder::EndMarker() + { + } + void DummyRasterPassCommandRecorder::SetPipeline(RasterPipeline* pipeline) { } @@ -148,6 +172,14 @@ namespace RHI::Dummy { { } + void DummyCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyCommandRecorder::EndMarker() + { + } + Common::UniquePtr DummyCommandRecorder::BeginCopyPass() { return Common::UniquePtr(new DummyCopyPassCommandRecorder(dummyCommandBuffer)); diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h index 6cb31e96..8a297530 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h @@ -22,6 +22,8 @@ namespace RHI::Vulkan { ~VulkanCommandRecorder() override; void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; Common::UniquePtr BeginCopyPass() override; Common::UniquePtr BeginComputePass() override; Common::UniquePtr BeginRasterPass(const RasterPassBeginInfo& inBeginInfo) override; @@ -40,6 +42,8 @@ namespace RHI::Vulkan { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // CopyPassCommandRecorder void CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) override; @@ -62,6 +66,8 @@ namespace RHI::Vulkan { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // ComputePassCommandRecorder void SetPipeline(ComputePipeline* inPipeline) override; @@ -84,6 +90,8 @@ namespace RHI::Vulkan { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // RasterPassCommandRecorder void SetPipeline(RasterPipeline* inPipeline) override; diff --git a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp index fa0d7857..dfb093ba 100644 --- a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp @@ -209,6 +209,26 @@ namespace RHI::Vulkan { } } + void VulkanCommandRecorder::BeginMarker(const std::string& inLabel) + { +#if BUILD_CONFIG_DEBUG + VkDebugUtilsLabelEXT labelInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT }; + labelInfo.pLabelName = inLabel.c_str(); + labelInfo.color[0] = labelInfo.color[1] = labelInfo.color[2] = labelInfo.color[3] = 1.0f; + + auto* pfn = device.GetGpu().GetInstance().FindOrGetTypedDynamicFuncPointer("vkCmdBeginDebugUtilsLabelEXT"); + pfn(commandBuffer.GetNative(), &labelInfo); +#endif + } + + void VulkanCommandRecorder::EndMarker() + { +#if BUILD_CONFIG_DEBUG + auto* pfn = device.GetGpu().GetInstance().FindOrGetTypedDynamicFuncPointer("vkCmdEndDebugUtilsLabelEXT"); + pfn(commandBuffer.GetNative()); +#endif + } + Common::UniquePtr VulkanCommandRecorder::BeginCopyPass() { return Common::UniquePtr(new VulkanCopyPassCommandRecorder(device, *this, commandBuffer)); @@ -243,6 +263,16 @@ namespace RHI::Vulkan { commandRecorder.ResourceBarrier(inBarrier); } + void VulkanCopyPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void VulkanCopyPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void VulkanCopyPassCommandRecorder::CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) { const auto* srcBuffer = static_cast(src); @@ -308,6 +338,16 @@ namespace RHI::Vulkan { commandRecorder.ResourceBarrier(inBarrier); } + void VulkanComputePassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void VulkanComputePassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void VulkanComputePassCommandRecorder::SetPipeline(ComputePipeline* inPipeline) { computePipeline = static_cast(inPipeline); @@ -408,6 +448,16 @@ namespace RHI::Vulkan { commandRecorder.ResourceBarrier(inBarrier); } + void VulkanRasterPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void VulkanRasterPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void VulkanRasterPassCommandRecorder::SetPipeline(RasterPipeline* inPipeline) { rasterPipeline = static_cast(inPipeline); diff --git a/Engine/Source/RHI/Include/RHI/CommandRecorder.h b/Engine/Source/RHI/Include/RHI/CommandRecorder.h index fbeea7e4..3b9a4a28 100644 --- a/Engine/Source/RHI/Include/RHI/CommandRecorder.h +++ b/Engine/Source/RHI/Include/RHI/CommandRecorder.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -199,6 +200,8 @@ namespace RHI { public: virtual ~CommonCommandRecorder(); virtual void ResourceBarrier(const Barrier& barrier) = 0; + virtual void BeginMarker(const std::string& label) = 0; + virtual void EndMarker() = 0; }; class CopyPassCommandRecorder : public CommonCommandRecorder { @@ -270,8 +273,27 @@ namespace RHI { protected: CommandRecorder(); }; + + class ScopedMarker { + public: + NonCopyable(ScopedMarker) + ScopedMarker(CommonCommandRecorder& inRecorder, const std::string& inLabel); + ~ScopedMarker(); + + private: + CommonCommandRecorder& recorder; + }; } +#define RHI_MARKER_CONCAT_IMPL(a, b) a##b +#define RHI_MARKER_CONCAT(a, b) RHI_MARKER_CONCAT_IMPL(a, b) +#if BUILD_CONFIG_DEBUG +#define RHI_SCOPED_MARKER(recorder, label) \ + RHI::ScopedMarker RHI_MARKER_CONCAT(rhiScopedMarker_, __COUNTER__) { recorder, label } +#else +#define RHI_SCOPED_MARKER(recorder, label) ((void) 0) +#endif + namespace RHI { template ColorAttachmentBase::ColorAttachmentBase( diff --git a/Engine/Source/RHI/Src/CommandRecorder.cpp b/Engine/Source/RHI/Src/CommandRecorder.cpp index d553f51b..c3f67420 100644 --- a/Engine/Source/RHI/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI/Src/CommandRecorder.cpp @@ -207,4 +207,15 @@ namespace RHI { RasterPassCommandRecorder::RasterPassCommandRecorder() = default; RasterPassCommandRecorder::~RasterPassCommandRecorder() = default; + + ScopedMarker::ScopedMarker(CommonCommandRecorder& inRecorder, const std::string& inLabel) + : recorder(inRecorder) + { + recorder.BeginMarker(inLabel); + } + + ScopedMarker::~ScopedMarker() + { + recorder.EndMarker(); + } } diff --git a/Engine/Source/Render/Src/RenderGraph.cpp b/Engine/Source/Render/Src/RenderGraph.cpp index c119bac8..247777cf 100644 --- a/Engine/Source/Render/Src/RenderGraph.cpp +++ b/Engine/Source/Render/Src/RenderGraph.cpp @@ -783,6 +783,7 @@ namespace Render { void RGBuilder::ExecuteCopyPass(RHI::CommandRecorder& inRecoder, RGCopyPass* inCopyPass) { + RHI_SCOPED_MARKER(inRecoder, inCopyPass->name); DevirtualizeResources(passWritesMap.at(inCopyPass)); { TransitionResourcesForCopyPassDesc(inRecoder, inCopyPass->passDesc); @@ -803,6 +804,7 @@ namespace Render { void RGBuilder::ExecuteComputePass(RHI::CommandRecorder& inRecoder, RGComputePass* inComputePass) { + RHI_SCOPED_MARKER(inRecoder, inComputePass->name); DevirtualizeResources(passWritesMap.at(inComputePass)); DevirtualizeBindGroupsAndViews(inComputePass->bindGroups); { @@ -825,6 +827,7 @@ namespace Render { void RGBuilder::ExecuteRasterPass(RHI::CommandRecorder& inRecoder, RGRasterPass* inRasterPass) { + RHI_SCOPED_MARKER(inRecoder, inRasterPass->name); DevirtualizeResources(passWritesMap.at(inRasterPass)); DevirtualizeAttachmentViews(inRasterPass->passDesc); DevirtualizeBindGroupsAndViews(inRasterPass->bindGroups); From 4cb0d098df2f50b80322a0bc25704135be8ba54f Mon Sep 17 00:00:00 2001 From: kindem Date: Sat, 27 Jun 2026 13:29:53 +0800 Subject: [PATCH 5/7] feat: add gpu features and limits query to rhi --- .../RHI-DirectX12/Include/RHI/DirectX12/Gpu.h | 2 + Engine/Source/RHI-DirectX12/Src/Gpu.cpp | 39 ++++++++++++ .../Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h | 2 + Engine/Source/RHI-Dummy/Src/Gpu.cpp | 10 ++++ .../RHI-Vulkan/Include/RHI/Vulkan/Gpu.h | 2 + Engine/Source/RHI-Vulkan/Src/Gpu.cpp | 59 +++++++++++++++++++ Engine/Source/RHI/Include/RHI/Common.h | 11 ++++ Engine/Source/RHI/Include/RHI/Gpu.h | 25 ++++++++ 8 files changed, 150 insertions(+) diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h index 7a9a911a..2e875889 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h @@ -20,6 +20,8 @@ namespace RHI::DirectX12 { ~DX12Gpu() override; GpuProperty GetProperty() override; + FeatureFlags GetFeatures() override; + GpuLimits GetLimits() override; Common::UniquePtr RequestDevice(const DeviceCreateInfo& inCreateInfo) override; DX12Instance& GetInstance() const override; diff --git a/Engine/Source/RHI-DirectX12/Src/Gpu.cpp b/Engine/Source/RHI-DirectX12/Src/Gpu.cpp index f80bed93..086d2cb6 100644 --- a/Engine/Source/RHI-DirectX12/Src/Gpu.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Gpu.cpp @@ -27,6 +27,45 @@ namespace RHI::DirectX12 { return property; } + FeatureFlags DX12Gpu::GetFeatures() + { + return FeatureBits::samplerAnisotropy + | FeatureBits::textureCompressionBc + | FeatureBits::timestampQuery + | FeatureBits::multiDrawIndirect + | FeatureBits::drawIndirectFirstInstance; + } + + GpuLimits DX12Gpu::GetLimits() + { + // D3D12 has no bind-group concept and its resource-size cap is tier-dependent, so both use conservative values. + constexpr uint32_t maxBindGroups = 8; + constexpr uint64_t maxResourceSize = 1ull << 31; + + GpuLimits result {}; + result.maxTextureDimension1D = D3D12_REQ_TEXTURE1D_U_DIMENSION; + result.maxTextureDimension2D = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; + result.maxTextureDimension3D = D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; + result.maxTextureArrayLayers = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION; + result.maxBindGroups = maxBindGroups; + result.maxVertexBuffers = D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; + result.maxVertexAttributes = D3D12_VS_INPUT_REGISTER_COUNT; + result.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; + result.maxUniformBufferBindingSize = static_cast(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT) * 16; + result.maxStorageBufferBindingSize = maxResourceSize; + result.maxBufferSize = maxResourceSize; + result.minUniformBufferOffsetAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; + result.minStorageBufferOffsetAlignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; + result.optimalBufferCopyOffsetAlignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; + result.optimalBufferCopyRowPitchAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + result.maxComputeWorkgroupSizeX = D3D12_CS_THREAD_GROUP_MAX_X; + result.maxComputeWorkgroupSizeY = D3D12_CS_THREAD_GROUP_MAX_Y; + result.maxComputeWorkgroupSizeZ = D3D12_CS_THREAD_GROUP_MAX_Z; + result.maxComputeInvocationsPerWorkgroup = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; + result.maxComputeWorkgroupsPerDimension = D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; + return result; + } + DX12Instance& DX12Gpu::GetInstance() const { return instance; diff --git a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h index 01912270..08b72d74 100644 --- a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h +++ b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h @@ -14,6 +14,8 @@ namespace RHI::Dummy { explicit DummyGpu(DummyInstance& inInstance); ~DummyGpu() override; GpuProperty GetProperty() override; + FeatureFlags GetFeatures() override; + GpuLimits GetLimits() override; Common::UniquePtr RequestDevice(const DeviceCreateInfo& createInfo) override; DummyInstance& GetInstance() const override; diff --git a/Engine/Source/RHI-Dummy/Src/Gpu.cpp b/Engine/Source/RHI-Dummy/Src/Gpu.cpp index 5409865f..d02abc00 100644 --- a/Engine/Source/RHI-Dummy/Src/Gpu.cpp +++ b/Engine/Source/RHI-Dummy/Src/Gpu.cpp @@ -18,6 +18,16 @@ namespace RHI::Dummy { return {}; } + FeatureFlags DummyGpu::GetFeatures() + { + return FeatureFlags::null; + } + + GpuLimits DummyGpu::GetLimits() + { + return {}; + } + Common::UniquePtr DummyGpu::RequestDevice(const DeviceCreateInfo& createInfo) { return { new DummyDevice(*this, createInfo) }; diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h index a2c62f3b..ff34c927 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h @@ -17,6 +17,8 @@ namespace RHI::Vulkan { ~VulkanGpu() override; GpuProperty GetProperty() override; + FeatureFlags GetFeatures() override; + GpuLimits GetLimits() override; Common::UniquePtr RequestDevice(const DeviceCreateInfo& inCreateInfo) override; VulkanInstance& GetInstance() const override; diff --git a/Engine/Source/RHI-Vulkan/Src/Gpu.cpp b/Engine/Source/RHI-Vulkan/Src/Gpu.cpp index d155bb69..06bf4db0 100644 --- a/Engine/Source/RHI-Vulkan/Src/Gpu.cpp +++ b/Engine/Source/RHI-Vulkan/Src/Gpu.cpp @@ -2,6 +2,8 @@ // Created by johnk on 12/1/2022. // +#include + #include #include #include @@ -27,6 +29,63 @@ namespace RHI::Vulkan { return property; } + FeatureFlags VulkanGpu::GetFeatures() + { + VkPhysicalDeviceFeatures features; + vkGetPhysicalDeviceFeatures(nativePhysicalDevice, &features); + + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(nativePhysicalDevice, &properties); + + FeatureFlags result = FeatureFlags::null; + if (features.samplerAnisotropy) { result = result | FeatureBits::samplerAnisotropy; } + if (features.textureCompressionBC) { result = result | FeatureBits::textureCompressionBc; } + if (properties.limits.timestampComputeAndGraphics) { result = result | FeatureBits::timestampQuery; } + if (features.multiDrawIndirect) { result = result | FeatureBits::multiDrawIndirect; } + if (features.drawIndirectFirstInstance) { result = result | FeatureBits::drawIndirectFirstInstance; } + return result; + } + + GpuLimits VulkanGpu::GetLimits() + { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(nativePhysicalDevice, &properties); + const auto& nativeLimits = properties.limits; + + // Vulkan 1.0 has no direct max-buffer-size query, so the largest device-local heap is used as an approximation. + VkPhysicalDeviceMemoryProperties memoryProperties; + vkGetPhysicalDeviceMemoryProperties(nativePhysicalDevice, &memoryProperties); + VkDeviceSize maxDeviceLocalHeapSize = 0; + for (uint32_t i = 0; i < memoryProperties.memoryHeapCount; i++) { + if ((memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { + maxDeviceLocalHeapSize = std::max(maxDeviceLocalHeapSize, memoryProperties.memoryHeaps[i].size); + } + } + + GpuLimits result {}; + result.maxTextureDimension1D = nativeLimits.maxImageDimension1D; + result.maxTextureDimension2D = nativeLimits.maxImageDimension2D; + result.maxTextureDimension3D = nativeLimits.maxImageDimension3D; + result.maxTextureArrayLayers = nativeLimits.maxImageArrayLayers; + result.maxBindGroups = nativeLimits.maxBoundDescriptorSets; + result.maxVertexBuffers = nativeLimits.maxVertexInputBindings; + result.maxVertexAttributes = nativeLimits.maxVertexInputAttributes; + result.maxColorAttachments = nativeLimits.maxColorAttachments; + result.maxUniformBufferBindingSize = nativeLimits.maxUniformBufferRange; + result.maxStorageBufferBindingSize = nativeLimits.maxStorageBufferRange; + result.maxBufferSize = maxDeviceLocalHeapSize; + result.minUniformBufferOffsetAlignment = static_cast(nativeLimits.minUniformBufferOffsetAlignment); + result.minStorageBufferOffsetAlignment = static_cast(nativeLimits.minStorageBufferOffsetAlignment); + result.optimalBufferCopyOffsetAlignment = static_cast(nativeLimits.optimalBufferCopyOffsetAlignment); + result.optimalBufferCopyRowPitchAlignment = static_cast(nativeLimits.optimalBufferCopyRowPitchAlignment); + result.maxComputeWorkgroupSizeX = nativeLimits.maxComputeWorkGroupSize[0]; + result.maxComputeWorkgroupSizeY = nativeLimits.maxComputeWorkGroupSize[1]; + result.maxComputeWorkgroupSizeZ = nativeLimits.maxComputeWorkGroupSize[2]; + result.maxComputeInvocationsPerWorkgroup = nativeLimits.maxComputeWorkGroupInvocations; + result.maxComputeWorkgroupsPerDimension = nativeLimits.maxComputeWorkGroupCount[0]; + return result; + } + Common::UniquePtr VulkanGpu::RequestDevice(const DeviceCreateInfo& inCreateInfo) { return { new VulkanDevice(*this, inCreateInfo) }; diff --git a/Engine/Source/RHI/Include/RHI/Common.h b/Engine/Source/RHI/Include/RHI/Common.h index f5ba3853..ea4c46cf 100644 --- a/Engine/Source/RHI/Include/RHI/Common.h +++ b/Engine/Source/RHI/Include/RHI/Common.h @@ -463,6 +463,17 @@ namespace RHI { }; using ColorWriteFlags = Common::Flags; DECLARE_FLAG_BITS_OP(ColorWriteFlags, ColorWriteBits) + + enum class FeatureBits : uint32_t { + samplerAnisotropy = 0x1, + textureCompressionBc = 0x2, + timestampQuery = 0x4, + multiDrawIndirect = 0x8, + drawIndirectFirstInstance = 0x10, + max = 0x20 + }; + using FeatureFlags = Common::Flags; + DECLARE_FLAG_BITS_OP(FeatureFlags, FeatureBits) } namespace RHI { diff --git a/Engine/Source/RHI/Include/RHI/Gpu.h b/Engine/Source/RHI/Include/RHI/Gpu.h index decf5b7d..981e8655 100644 --- a/Engine/Source/RHI/Include/RHI/Gpu.h +++ b/Engine/Source/RHI/Include/RHI/Gpu.h @@ -20,11 +20,36 @@ namespace RHI { GpuType type; }; + struct GpuLimits { + uint32_t maxTextureDimension1D; + uint32_t maxTextureDimension2D; + uint32_t maxTextureDimension3D; + uint32_t maxTextureArrayLayers; + uint32_t maxBindGroups; + uint32_t maxVertexBuffers; + uint32_t maxVertexAttributes; + uint32_t maxColorAttachments; + uint64_t maxUniformBufferBindingSize; + uint64_t maxStorageBufferBindingSize; + uint64_t maxBufferSize; + uint32_t minUniformBufferOffsetAlignment; + uint32_t minStorageBufferOffsetAlignment; + uint32_t optimalBufferCopyOffsetAlignment; + uint32_t optimalBufferCopyRowPitchAlignment; + uint32_t maxComputeWorkgroupSizeX; + uint32_t maxComputeWorkgroupSizeY; + uint32_t maxComputeWorkgroupSizeZ; + uint32_t maxComputeInvocationsPerWorkgroup; + uint32_t maxComputeWorkgroupsPerDimension; + }; + class Gpu { public: NonCopyable(Gpu) virtual ~Gpu(); virtual GpuProperty GetProperty() = 0; + virtual FeatureFlags GetFeatures() = 0; + virtual GpuLimits GetLimits() = 0; virtual Common::UniquePtr RequestDevice(const DeviceCreateInfo& createInfo) = 0; virtual Instance& GetInstance() const = 0; From 314590051ddc59c6b67fb9a1810ac60e3fdf35a2 Mon Sep 17 00:00:00 2001 From: kindem Date: Sat, 27 Jun 2026 20:21:19 +0800 Subject: [PATCH 6/7] fix: own each dist copy once to make runtime copies generator-agnostic Runtime dependencies and resources all land in one shared per-sub-project Binaries directory, and previously every consumer re-copied an overlapping set of files there, so parallel builds raced on the same destination files (intermittent "Error copying file" failures, hit most often on macOS). Now each runtime file is copied by exactly one owner. Files referenced through a target ($) get a deduplicated per-file owner created in the consumer scope where the target is visible (imported third-party targets such as Qt are directory-scoped); first-party owners wait on their producing target, and they cannot share a single owner or a build tool such as MirrorTool would close a dependency cycle. Plain-path third-party files and resources are batched into one per-sub-project assets target whose copies run sequentially. --- CMake/Target.cmake | 149 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 123 insertions(+), 26 deletions(-) diff --git a/CMake/Target.cmake b/CMake/Target.cmake index a123096d..fc6cfe5f 100644 --- a/CMake/Target.cmake +++ b/CMake/Target.cmake @@ -86,6 +86,95 @@ function(exp_gather_target_runtime_dependencies_recurse) set(${arg_OUT_DEP_TARGET} ${result_dep_target} PARENT_SCOPE) endfunction() +# Every runtime file lands in the single shared per-sub-project Binaries directory. To keep the copies generator-agnostic +# and race-free, each destination is owned by exactly one copy step rather than being re-copied by every consumer: +# - files referenced through a target ($) get a per-file owner created in the consumer's scope, so the +# generator expression resolves where the target is visible (imported third-party targets such as Qt are +# directory-scoped and would be invisible in a global aggregate target). A first-party owner additionally waits on its +# producing target; merging these into one target is impossible because a build tool such as MirrorTool consumes a dll +# while another dll's producer transitively depends on the tool, which would close a build cycle; +# - prebuilt third-party files given as plain paths, plus resources, carry no target and are batched (deduplicated) into +# one per-sub-project assets target whose copies run sequentially (see exp_finalize_dist_assets). +function(exp_add_runtime_dep_copy) + set(options "") + set(singleValueArgs KEY SRC PRODUCER OUTPUT_TARGET) + set(multiValueArgs "") + cmake_parse_arguments(arg "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) + + string(MAKE_C_IDENTIFIER "${arg_KEY}" key_id) + set(registry_property EXP_RUNTIME_DEP_COPY_${SUB_PROJECT_NAME}_${key_id}) + + get_property(copy_target GLOBAL PROPERTY ${registry_property}) + if (NOT copy_target) + exp_get_runtime_output_dir(OUTPUT out_dir) + set(copy_target ${SUB_PROJECT_NAME}.CopyDll.${key_id}) + add_custom_target( + ${copy_target} + COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${arg_SRC} ${out_dir} + ) + set_target_properties(${copy_target} PROPERTIES FOLDER ${AUX_TARGETS_FOLDER}) + if (arg_PRODUCER) + add_dependencies(${copy_target} ${arg_PRODUCER}) + endif () + set_property(GLOBAL PROPERTY ${registry_property} ${copy_target}) + endif () + + set(${arg_OUTPUT_TARGET} ${copy_target} PARENT_SCOPE) +endfunction() + +function(exp_schedule_dist_assets_finalize) + get_property(scheduled GLOBAL PROPERTY EXP_DIST_ASSETS_SCHEDULED_${SUB_PROJECT_NAME}) + if (NOT scheduled) + set_property(GLOBAL PROPERTY EXP_DIST_ASSETS_SCHEDULED_${SUB_PROJECT_NAME} TRUE) + cmake_language(DEFER DIRECTORY ${CMAKE_SOURCE_DIR} CALL exp_finalize_dist_assets "${SUB_PROJECT_NAME}") + endif () +endfunction() + +function(exp_finalize_dist_assets sub_project) + get_property(asset_files GLOBAL PROPERTY EXP_DIST_ASSET_FILES_${sub_project}) + get_property(asset_resources GLOBAL PROPERTY EXP_DIST_ASSET_RESOURCES_${sub_project}) + get_property(consumers GLOBAL PROPERTY EXP_DIST_ASSET_CONSUMERS_${sub_project}) + + if (NOT asset_files AND NOT asset_resources) + return() + endif () + + if (with_multi_config_generator) + set(out_dir ${CMAKE_BINARY_DIR}/Dist/$/${sub_project}/Binaries) + else () + set(out_dir ${CMAKE_BINARY_DIR}/Dist/${sub_project}/Binaries) + endif () + + set(copy_commands COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir}) + if (asset_files) + list(REMOVE_DUPLICATES asset_files) + foreach (f ${asset_files}) + list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${f} ${out_dir}) + endforeach () + endif () + if (asset_resources) + list(REMOVE_DUPLICATES asset_resources) + foreach (entry ${asset_resources}) + string(REPLACE "->" ";" pair "${entry}") + list(GET pair 0 src) + list(GET pair 1 dst) + list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${out_dir}/${dst}) + endforeach () + endif () + + set(copy_target ${sub_project}.CopyDistAssets) + add_custom_target(${copy_target} ${copy_commands}) + set_target_properties(${copy_target} PROPERTIES FOLDER ${sub_project}/Aux) + + if (consumers) + list(REMOVE_DUPLICATES consumers) + foreach (consumer ${consumers}) + add_dependencies(${consumer} ${copy_target}) + endforeach () + endif () +endfunction() + function(exp_process_runtime_dependencies) set(options NOT_INSTALL) set(singleValueArgs NAME) @@ -105,28 +194,41 @@ function(exp_process_runtime_dependencies) OUT_DEP_TARGET dep_dep_targets ) list(APPEND runtime_deps ${dep_target_runtime_deps}) - list(APPEND dep_targets ${dep_dep_targets}) endforeach () - set(copy_commands COMMAND ${CMAKE_COMMAND} -E make_directory $) - foreach(r ${runtime_deps}) - list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${r} $) - endforeach() - if (NOT "${copy_commands}" STREQUAL "") - set(custom_target_name ${arg_NAME}.CopyRuntimeDeps) - add_custom_target( - ${custom_target_name} - ${copy_commands} - ) + if (runtime_deps) + list(REMOVE_DUPLICATES runtime_deps) + endif () - add_dependencies(${arg_NAME} ${custom_target_name}) - foreach (t ${dep_targets}) - add_dependencies(${custom_target_name} ${t}) - endforeach () + foreach (r ${runtime_deps}) + set(referenced "") + if ("${r}" MATCHES "^\\$$") + set(referenced ${CMAKE_MATCH_1}) + endif () - set_target_properties(${custom_target_name} PROPERTIES FOLDER ${AUX_TARGETS_FOLDER}) - endif () - if (NOT arg_NOT_INSTALL AND NOT "${runtime_deps}" STREQUAL "") + if (referenced AND TARGET ${referenced}) + set(producer "") + get_target_property(referenced_imported ${referenced} IMPORTED) + if (NOT referenced_imported) + set(producer ${referenced}) + endif () + + exp_add_runtime_dep_copy( + KEY ${r} + SRC ${r} + PRODUCER ${producer} + OUTPUT_TARGET copy_target + ) + add_dependencies(${arg_NAME} ${copy_target}) + else () + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_FILES_${SUB_PROJECT_NAME} ${r}) + endif () + endforeach () + + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_CONSUMERS_${SUB_PROJECT_NAME} ${arg_NAME}) + exp_schedule_dist_assets_finalize() + + if (NOT arg_NOT_INSTALL AND runtime_deps) install( FILES ${runtime_deps} DESTINATION ${SUB_PROJECT_NAME}/Binaries ) @@ -160,7 +262,7 @@ function(exp_add_resources_copy_command) OUTPUT_DST dst ) - list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} $/${dst}) + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_RESOURCES_${SUB_PROJECT_NAME} "${src}->${dst}") cmake_path(SET dst_path NORMALIZE "${SUB_PROJECT_NAME}/Binaries/${dst}") cmake_path(GET dst_path PARENT_PATH dst_dir) @@ -169,13 +271,8 @@ function(exp_add_resources_copy_command) endif () endforeach() - set(copy_res_target_name ${arg_NAME}.CopyRes) - add_custom_target( - ${copy_res_target_name} - ${copy_commands} - ) - set_target_properties(${copy_res_target_name} PROPERTIES FOLDER ${AUX_TARGETS_FOLDER}) - add_dependencies(${arg_NAME} ${copy_res_target_name}) + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_CONSUMERS_${SUB_PROJECT_NAME} ${arg_NAME}) + exp_schedule_dist_assets_finalize() endfunction() function(exp_gather_target_libs) From 32494b23d3b2e40442df9607765ead520a5a6d09 Mon Sep 17 00:00:00 2001 From: FlyAntNotDown <461425614@qq.com> Date: Sun, 28 Jun 2026 02:04:09 +0800 Subject: [PATCH 7/7] fix: build dxc on newer clang by suppressing invalid-specialization DXC's StringRef.h specializes std::is_nothrow_constructible, which recent Apple Clang on the macOS-latest runner rejects as -Winvalid-specialization (an error by default), breaking the Configure CMake step. Pass -Wno-invalid-specialization (guarded by -Wno-unknown-warning-option for older clang) so the pinned source compiles while keeping its behavior. --- ThirdParty/ConanRecipes/dxc/conanfile.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ThirdParty/ConanRecipes/dxc/conanfile.py b/ThirdParty/ConanRecipes/dxc/conanfile.py index 0f4cf8e0..98b2a1db 100644 --- a/ThirdParty/ConanRecipes/dxc/conanfile.py +++ b/ThirdParty/ConanRecipes/dxc/conanfile.py @@ -38,6 +38,12 @@ def source(self): def generate(self): cmake_toolchain = CMakeToolchain(self, generator="Ninja") + # DXC's StringRef.h specializes std::is_nothrow_constructible (an upstream "HLSL Change"). Newer Clang flags + # specializing a standard library entity as -Winvalid-specialization, which is an error by default, so recent + # Apple Clang fails to build the pinned source. Suppress the diagnostic (the specialization still takes effect, + # preserving upstream behavior); -Wno-unknown-warning-option keeps older Clang that lacks the flag happy. + if str(self.settings.compiler) in ("clang", "apple-clang"): + cmake_toolchain.extra_cxxflags.extend(["-Wno-unknown-warning-option", "-Wno-invalid-specialization"]) cmake_toolchain.generate() deps = CMakeDeps(self)