diff --git a/CMake/Target.cmake b/CMake/Target.cmake index a123096d..fc6cfe5f 100644 --- a/CMake/Target.cmake +++ b/CMake/Target.cmake @@ -86,6 +86,95 @@ function(exp_gather_target_runtime_dependencies_recurse) set(${arg_OUT_DEP_TARGET} ${result_dep_target} PARENT_SCOPE) endfunction() +# Every runtime file lands in the single shared per-sub-project Binaries directory. To keep the copies generator-agnostic +# and race-free, each destination is owned by exactly one copy step rather than being re-copied by every consumer: +# - files referenced through a target ($) get a per-file owner created in the consumer's scope, so the +# generator expression resolves where the target is visible (imported third-party targets such as Qt are +# directory-scoped and would be invisible in a global aggregate target). A first-party owner additionally waits on its +# producing target; merging these into one target is impossible because a build tool such as MirrorTool consumes a dll +# while another dll's producer transitively depends on the tool, which would close a build cycle; +# - prebuilt third-party files given as plain paths, plus resources, carry no target and are batched (deduplicated) into +# one per-sub-project assets target whose copies run sequentially (see exp_finalize_dist_assets). +function(exp_add_runtime_dep_copy) + set(options "") + set(singleValueArgs KEY SRC PRODUCER OUTPUT_TARGET) + set(multiValueArgs "") + cmake_parse_arguments(arg "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) + + string(MAKE_C_IDENTIFIER "${arg_KEY}" key_id) + set(registry_property EXP_RUNTIME_DEP_COPY_${SUB_PROJECT_NAME}_${key_id}) + + get_property(copy_target GLOBAL PROPERTY ${registry_property}) + if (NOT copy_target) + exp_get_runtime_output_dir(OUTPUT out_dir) + set(copy_target ${SUB_PROJECT_NAME}.CopyDll.${key_id}) + add_custom_target( + ${copy_target} + COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${arg_SRC} ${out_dir} + ) + set_target_properties(${copy_target} PROPERTIES FOLDER ${AUX_TARGETS_FOLDER}) + if (arg_PRODUCER) + add_dependencies(${copy_target} ${arg_PRODUCER}) + endif () + set_property(GLOBAL PROPERTY ${registry_property} ${copy_target}) + endif () + + set(${arg_OUTPUT_TARGET} ${copy_target} PARENT_SCOPE) +endfunction() + +function(exp_schedule_dist_assets_finalize) + get_property(scheduled GLOBAL PROPERTY EXP_DIST_ASSETS_SCHEDULED_${SUB_PROJECT_NAME}) + if (NOT scheduled) + set_property(GLOBAL PROPERTY EXP_DIST_ASSETS_SCHEDULED_${SUB_PROJECT_NAME} TRUE) + cmake_language(DEFER DIRECTORY ${CMAKE_SOURCE_DIR} CALL exp_finalize_dist_assets "${SUB_PROJECT_NAME}") + endif () +endfunction() + +function(exp_finalize_dist_assets sub_project) + get_property(asset_files GLOBAL PROPERTY EXP_DIST_ASSET_FILES_${sub_project}) + get_property(asset_resources GLOBAL PROPERTY EXP_DIST_ASSET_RESOURCES_${sub_project}) + get_property(consumers GLOBAL PROPERTY EXP_DIST_ASSET_CONSUMERS_${sub_project}) + + if (NOT asset_files AND NOT asset_resources) + return() + endif () + + if (with_multi_config_generator) + set(out_dir ${CMAKE_BINARY_DIR}/Dist/$/${sub_project}/Binaries) + else () + set(out_dir ${CMAKE_BINARY_DIR}/Dist/${sub_project}/Binaries) + endif () + + set(copy_commands COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir}) + if (asset_files) + list(REMOVE_DUPLICATES asset_files) + foreach (f ${asset_files}) + list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${f} ${out_dir}) + endforeach () + endif () + if (asset_resources) + list(REMOVE_DUPLICATES asset_resources) + foreach (entry ${asset_resources}) + string(REPLACE "->" ";" pair "${entry}") + list(GET pair 0 src) + list(GET pair 1 dst) + list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${out_dir}/${dst}) + endforeach () + endif () + + set(copy_target ${sub_project}.CopyDistAssets) + add_custom_target(${copy_target} ${copy_commands}) + set_target_properties(${copy_target} PROPERTIES FOLDER ${sub_project}/Aux) + + if (consumers) + list(REMOVE_DUPLICATES consumers) + foreach (consumer ${consumers}) + add_dependencies(${consumer} ${copy_target}) + endforeach () + endif () +endfunction() + function(exp_process_runtime_dependencies) set(options NOT_INSTALL) set(singleValueArgs NAME) @@ -105,28 +194,41 @@ function(exp_process_runtime_dependencies) OUT_DEP_TARGET dep_dep_targets ) list(APPEND runtime_deps ${dep_target_runtime_deps}) - list(APPEND dep_targets ${dep_dep_targets}) endforeach () - set(copy_commands COMMAND ${CMAKE_COMMAND} -E make_directory $) - foreach(r ${runtime_deps}) - list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${r} $) - endforeach() - if (NOT "${copy_commands}" STREQUAL "") - set(custom_target_name ${arg_NAME}.CopyRuntimeDeps) - add_custom_target( - ${custom_target_name} - ${copy_commands} - ) + if (runtime_deps) + list(REMOVE_DUPLICATES runtime_deps) + endif () - add_dependencies(${arg_NAME} ${custom_target_name}) - foreach (t ${dep_targets}) - add_dependencies(${custom_target_name} ${t}) - endforeach () + foreach (r ${runtime_deps}) + set(referenced "") + if ("${r}" MATCHES "^\\$$") + set(referenced ${CMAKE_MATCH_1}) + endif () - set_target_properties(${custom_target_name} PROPERTIES FOLDER ${AUX_TARGETS_FOLDER}) - endif () - if (NOT arg_NOT_INSTALL AND NOT "${runtime_deps}" STREQUAL "") + if (referenced AND TARGET ${referenced}) + set(producer "") + get_target_property(referenced_imported ${referenced} IMPORTED) + if (NOT referenced_imported) + set(producer ${referenced}) + endif () + + exp_add_runtime_dep_copy( + KEY ${r} + SRC ${r} + PRODUCER ${producer} + OUTPUT_TARGET copy_target + ) + add_dependencies(${arg_NAME} ${copy_target}) + else () + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_FILES_${SUB_PROJECT_NAME} ${r}) + endif () + endforeach () + + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_CONSUMERS_${SUB_PROJECT_NAME} ${arg_NAME}) + exp_schedule_dist_assets_finalize() + + if (NOT arg_NOT_INSTALL AND runtime_deps) install( FILES ${runtime_deps} DESTINATION ${SUB_PROJECT_NAME}/Binaries ) @@ -160,7 +262,7 @@ function(exp_add_resources_copy_command) OUTPUT_DST dst ) - list(APPEND copy_commands COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} $/${dst}) + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_RESOURCES_${SUB_PROJECT_NAME} "${src}->${dst}") cmake_path(SET dst_path NORMALIZE "${SUB_PROJECT_NAME}/Binaries/${dst}") cmake_path(GET dst_path PARENT_PATH dst_dir) @@ -169,13 +271,8 @@ function(exp_add_resources_copy_command) endif () endforeach() - set(copy_res_target_name ${arg_NAME}.CopyRes) - add_custom_target( - ${copy_res_target_name} - ${copy_commands} - ) - set_target_properties(${copy_res_target_name} PROPERTIES FOLDER ${AUX_TARGETS_FOLDER}) - add_dependencies(${arg_NAME} ${copy_res_target_name}) + set_property(GLOBAL APPEND PROPERTY EXP_DIST_ASSET_CONSUMERS_${SUB_PROJECT_NAME} ${arg_NAME}) + exp_schedule_dist_assets_finalize() endfunction() function(exp_gather_target_libs) diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h index 0b583ebb..ed512d16 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/CommandRecorder.h @@ -20,6 +20,8 @@ namespace RHI::DirectX12 { ~DX12CommandRecorder() override; void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; Common::UniquePtr BeginCopyPass() override; Common::UniquePtr BeginComputePass() override; Common::UniquePtr BeginRasterPass(const RasterPassBeginInfo& inBeginInfo) override; @@ -38,6 +40,8 @@ namespace RHI::DirectX12 { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // CopyPassCommandRecorder void CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) override; @@ -60,6 +64,8 @@ namespace RHI::DirectX12 { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // ComputePassCommandRecorder void SetPipeline(ComputePipeline* inPipeline) override; @@ -82,6 +88,8 @@ namespace RHI::DirectX12 { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // RasterPassCommandRecorder void SetPipeline(RasterPipeline* inPipeline) override; @@ -95,6 +103,10 @@ namespace RHI::DirectX12 { void SetPrimitiveTopology(PrimitiveTopology inPrimitiveTopology) override; void SetBlendConstant(const float* inConstants) override; void SetStencilReference(uint32_t inReference) override; + void DrawIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void DrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void MultiDrawIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; + void MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; void EndPass() override; private: diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h index 295daae1..e6e39994 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h @@ -252,6 +252,7 @@ namespace RHI::DirectX12 { ECIMPL_ITEM(BufferState::shaderReadOnly, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) ECIMPL_ITEM(BufferState::storage, D3D12_RESOURCE_STATE_COMMON) ECIMPL_ITEM(BufferState::rwStorage, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) + ECIMPL_ITEM(BufferState::indirect, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT) ECIMPL_END(D3D12_RESOURCE_STATES) ECIMPL_BEGIN(TextureDimension, D3D12_RESOURCE_DIMENSION) diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h index 3bb94b96..35692a15 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Device.h @@ -95,6 +95,8 @@ namespace RHI::DirectX12 { TextureSubResourceCopyFootprint GetTextureSubResourceCopyFootprint(const Texture& texture, const TextureSubResourceInfo& subResourceInfo) override; ID3D12Device* GetNative() const; + ID3D12CommandSignature* GetDrawIndirectCommandSignature() const; + ID3D12CommandSignature* GetDrawIndexedIndirectCommandSignature() const; Common::UniquePtr AllocateRtvDescriptor() const; Common::UniquePtr AllocateCbvSrvUavDescriptor() const; Common::UniquePtr AllocateSamplerDescriptor() const; @@ -105,6 +107,7 @@ namespace RHI::DirectX12 { void CreateNativeQueues(const DeviceCreateInfo& inCreateInfo); void QueryNativeDescriptorSize(); void CreateDescriptorPools(); + void CreateDrawIndirectCommandSignatures(); #if BUILD_CONFIG_DEBUG void RegisterNativeDebugLayerExceptionHandler(); void UnregisterNativeDebugLayerExceptionHandler(); @@ -122,5 +125,7 @@ namespace RHI::DirectX12 { Common::UniquePtr samplerDescriptorPool; Common::UniquePtr dsvDescriptorPool; ComPtr nativeDevice; + ComPtr drawIndirectCommandSignature; + ComPtr drawIndexedIndirectCommandSignature; }; } diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h index 7a9a911a..2e875889 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Gpu.h @@ -20,6 +20,8 @@ namespace RHI::DirectX12 { ~DX12Gpu() override; GpuProperty GetProperty() override; + FeatureFlags GetFeatures() override; + GpuLimits GetLimits() override; Common::UniquePtr RequestDevice(const DeviceCreateInfo& inCreateInfo) override; DX12Instance& GetInstance() const override; diff --git a/Engine/Source/RHI-DirectX12/Src/Buffer.cpp b/Engine/Source/RHI-DirectX12/Src/Buffer.cpp index 4d25b595..93b0b399 100644 --- a/Engine/Source/RHI-DirectX12/Src/Buffer.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Buffer.cpp @@ -80,7 +80,7 @@ namespace RHI::DirectX12 { void* data; const CD3DX12_RANGE range(inOffset, inOffset + inLength); Assert(SUCCEEDED(nativeResource->Map(0, &range, &data))); - return data; + return static_cast(data) + inOffset; } void DX12Buffer::Unmap() diff --git a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp index f60285cf..9dd0c129 100644 --- a/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-DirectX12/Src/CommandRecorder.cpp @@ -48,12 +48,14 @@ namespace RHI::DirectX12 { { const auto aspectLayout = device.GetTextureSubResourceCopyFootprint(texture, copyInfo.textureSubResource); // NOLINT + // The buffer is laid out as the full sub-resource footprint (so the slice stride is RowPitch * full height); + // the copied window is selected by the box passed to CopyTextureRegion, not by shrinking this footprint. D3D12_PLACED_SUBRESOURCE_FOOTPRINT bufferLayout; bufferLayout.Offset = copyInfo.bufferOffset; bufferLayout.Footprint.Format = texture.GetNative()->GetDesc().Format; - bufferLayout.Footprint.Width = copyInfo.copyRegion.x; - bufferLayout.Footprint.Height = copyInfo.copyRegion.y; - bufferLayout.Footprint.Depth = copyInfo.copyRegion.z; + bufferLayout.Footprint.Width = aspectLayout.extent.x; + bufferLayout.Footprint.Height = aspectLayout.extent.y; + bufferLayout.Footprint.Depth = aspectLayout.extent.z; bufferLayout.Footprint.RowPitch = aspectLayout.rowPitch; return { buffer.GetNative(), bufferLayout }; } @@ -71,6 +73,12 @@ namespace RHI::DirectX12 { } } +namespace RHI::DirectX12::Internal { + // PIX_EVENT_ANSI_VERSION: marks the BeginEvent payload as a plain ANSI string, decoded by RenderDoc/PIX without the + // WinPixEventRuntime dependency. + constexpr UINT pixEventAnsiVersion = 1; +} + namespace RHI::DirectX12 { DX12CopyPassCommandRecorder::DX12CopyPassCommandRecorder(DX12Device& inDevice, DX12CommandRecorder& inCmdRecorder, DX12CommandBuffer& inCmdBuffer) : device(inDevice) @@ -86,6 +94,16 @@ namespace RHI::DirectX12 { commandRecorder.ResourceBarrier(inBarrier); } + void DX12CopyPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void DX12CopyPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void DX12CopyPassCommandRecorder::CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) { const auto* srcBuffer = static_cast(src); @@ -168,6 +186,16 @@ namespace RHI::DirectX12 { commandRecorder.ResourceBarrier(inBarrier); } + void DX12ComputePassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void DX12ComputePassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void DX12ComputePassCommandRecorder::SetPipeline(ComputePipeline* inPipeline) { computePipeline = static_cast(inPipeline); @@ -251,6 +279,16 @@ namespace RHI::DirectX12 { commandRecorder.ResourceBarrier(inBarrier); } + void DX12RasterPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void DX12RasterPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void DX12RasterPassCommandRecorder::SetPipeline(RasterPipeline* inPipeline) { rasterPipeline = static_cast(inPipeline); @@ -329,6 +367,28 @@ namespace RHI::DirectX12 { commandBuffer.GetNativeCmdList()->OMSetStencilRef(inReference); } + void DX12RasterPassCommandRecorder::DrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndirect(inIndirectBuffer, inOffset, 1); + } + + void DX12RasterPassCommandRecorder::DrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndexedIndirect(inIndirectBuffer, inOffset, 1); + } + + void DX12RasterPassCommandRecorder::MultiDrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + commandBuffer.GetNativeCmdList()->ExecuteIndirect(device.GetDrawIndirectCommandSignature(), inDrawCount, indirectBuffer->GetNative(), inOffset, nullptr, 0); + } + + void DX12RasterPassCommandRecorder::MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + commandBuffer.GetNativeCmdList()->ExecuteIndirect(device.GetDrawIndexedIndirectCommandSignature(), inDrawCount, indirectBuffer->GetNative(), inOffset, nullptr, 0); + } + void DX12RasterPassCommandRecorder::EndPass() { } @@ -383,6 +443,20 @@ namespace RHI::DirectX12 { commandBuffer.GetNativeCmdList()->ResourceBarrier(1, &resourceBarrier); } + void DX12CommandRecorder::BeginMarker(const std::string& inLabel) + { +#if BUILD_CONFIG_DEBUG + commandBuffer.GetNativeCmdList()->BeginEvent(Internal::pixEventAnsiVersion, inLabel.c_str(), static_cast(inLabel.size() + 1)); +#endif + } + + void DX12CommandRecorder::EndMarker() + { +#if BUILD_CONFIG_DEBUG + commandBuffer.GetNativeCmdList()->EndEvent(); +#endif + } + Common::UniquePtr DX12CommandRecorder::BeginCopyPass() { return Common::UniquePtr(new DX12CopyPassCommandRecorder(device, *this, commandBuffer)); diff --git a/Engine/Source/RHI-DirectX12/Src/Device.cpp b/Engine/Source/RHI-DirectX12/Src/Device.cpp index efe625f7..eb07ff72 100644 --- a/Engine/Source/RHI-DirectX12/Src/Device.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Device.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include namespace RHI::DirectX12 { @@ -152,6 +153,7 @@ namespace RHI::DirectX12 { CreateNativeQueues(inCreateInfo); QueryNativeDescriptorSize(); CreateDescriptorPools(); + CreateDrawIndirectCommandSignatures(); #if BUILD_CONFIG_DEBUG RegisterNativeDebugLayerExceptionHandler(); #endif @@ -267,7 +269,8 @@ namespace RHI::DirectX12 { const auto createInfo = texture.GetCreateInfo(); const auto nativeResourceDesc = dx12Texture.GetNative()->GetDesc(); - const size_t nativeSubResourceIndex = D3D12CalcSubresource(subResourceInfo.mipLevel, subResourceInfo.arrayLayer, 0, 1, 1); + const auto arraySize = createInfo.dimension == TextureDimension::t3D ? 1 : createInfo.depthOrArraySize; + const size_t nativeSubResourceIndex = D3D12CalcSubresource(subResourceInfo.mipLevel, subResourceInfo.arrayLayer, 0, createInfo.mipLevels, arraySize); D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; nativeDevice->GetCopyableFootprints(&nativeResourceDesc, nativeSubResourceIndex, 1, 0, &footprint, nullptr, nullptr, nullptr); @@ -286,6 +289,16 @@ namespace RHI::DirectX12 { return nativeDevice.Get(); } + ID3D12CommandSignature* DX12Device::GetDrawIndirectCommandSignature() const + { + return drawIndirectCommandSignature.Get(); + } + + ID3D12CommandSignature* DX12Device::GetDrawIndexedIndirectCommandSignature() const + { + return drawIndexedIndirectCommandSignature.Get(); + } + Common::UniquePtr DX12Device::AllocateRtvDescriptor() const { return rtvDescriptorPool->Allocate(); @@ -355,6 +368,28 @@ namespace RHI::DirectX12 { dsvDescriptorPool = Common::MakeUnique(*this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, nativeDsvDescriptorSize, 16); } + void DX12Device::CreateDrawIndirectCommandSignatures() + { + const auto createSignature = [this](const D3D12_INDIRECT_ARGUMENT_TYPE inArgumentType, const uint32_t inStride) -> ComPtr { + D3D12_INDIRECT_ARGUMENT_DESC argumentDesc {}; + argumentDesc.Type = inArgumentType; + + D3D12_COMMAND_SIGNATURE_DESC commandSignatureDesc {}; + commandSignatureDesc.ByteStride = inStride; + commandSignatureDesc.NumArgumentDescs = 1; + commandSignatureDesc.pArgumentDescs = &argumentDesc; + commandSignatureDesc.NodeMask = 0; + + // A command signature carrying only a draw argument changes no root parameters, so no root signature is required. + ComPtr commandSignature; + Assert(SUCCEEDED(nativeDevice->CreateCommandSignature(&commandSignatureDesc, nullptr, IID_PPV_ARGS(&commandSignature)))); + return commandSignature; + }; + + drawIndirectCommandSignature = createSignature(D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, sizeof(DrawIndirectArguments)); + drawIndexedIndirectCommandSignature = createSignature(D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, sizeof(DrawIndexedIndirectArguments)); + } + #if BUILD_CONFIG_DEBUG void DX12Device::RegisterNativeDebugLayerExceptionHandler() { diff --git a/Engine/Source/RHI-DirectX12/Src/Gpu.cpp b/Engine/Source/RHI-DirectX12/Src/Gpu.cpp index f80bed93..086d2cb6 100644 --- a/Engine/Source/RHI-DirectX12/Src/Gpu.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Gpu.cpp @@ -27,6 +27,45 @@ namespace RHI::DirectX12 { return property; } + FeatureFlags DX12Gpu::GetFeatures() + { + return FeatureBits::samplerAnisotropy + | FeatureBits::textureCompressionBc + | FeatureBits::timestampQuery + | FeatureBits::multiDrawIndirect + | FeatureBits::drawIndirectFirstInstance; + } + + GpuLimits DX12Gpu::GetLimits() + { + // D3D12 has no bind-group concept and its resource-size cap is tier-dependent, so both use conservative values. + constexpr uint32_t maxBindGroups = 8; + constexpr uint64_t maxResourceSize = 1ull << 31; + + GpuLimits result {}; + result.maxTextureDimension1D = D3D12_REQ_TEXTURE1D_U_DIMENSION; + result.maxTextureDimension2D = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; + result.maxTextureDimension3D = D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; + result.maxTextureArrayLayers = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION; + result.maxBindGroups = maxBindGroups; + result.maxVertexBuffers = D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; + result.maxVertexAttributes = D3D12_VS_INPUT_REGISTER_COUNT; + result.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; + result.maxUniformBufferBindingSize = static_cast(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT) * 16; + result.maxStorageBufferBindingSize = maxResourceSize; + result.maxBufferSize = maxResourceSize; + result.minUniformBufferOffsetAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; + result.minStorageBufferOffsetAlignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; + result.optimalBufferCopyOffsetAlignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; + result.optimalBufferCopyRowPitchAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; + result.maxComputeWorkgroupSizeX = D3D12_CS_THREAD_GROUP_MAX_X; + result.maxComputeWorkgroupSizeY = D3D12_CS_THREAD_GROUP_MAX_Y; + result.maxComputeWorkgroupSizeZ = D3D12_CS_THREAD_GROUP_MAX_Z; + result.maxComputeInvocationsPerWorkgroup = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; + result.maxComputeWorkgroupsPerDimension = D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; + return result; + } + DX12Instance& DX12Gpu::GetInstance() const { return instance; diff --git a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h index c45d5289..0d6b007e 100644 --- a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h +++ b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/CommandRecorder.h @@ -16,6 +16,8 @@ namespace RHI::Dummy { ~DummyCommandRecorder() override; void ResourceBarrier(const Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; Common::UniquePtr BeginCopyPass() override; Common::UniquePtr BeginComputePass() override; Common::UniquePtr BeginRasterPass(const RasterPassBeginInfo& beginInfo) override; @@ -33,6 +35,8 @@ namespace RHI::Dummy { // CommonCommandRecorder void ResourceBarrier(const RHI::Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; // CopyPassCommandRecorder void CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) override; @@ -50,6 +54,8 @@ namespace RHI::Dummy { // CommonCommandRecorder void ResourceBarrier(const RHI::Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; // ComputePassCommandRecorder void SetPipeline(ComputePipeline* pipeline) override; @@ -66,6 +72,8 @@ namespace RHI::Dummy { // CommonCommandRecorder void ResourceBarrier(const RHI::Barrier& barrier) override; + void BeginMarker(const std::string& label) override; + void EndMarker() override; // RasterPassCommandRecorder void SetPipeline(RasterPipeline* pipeline) override; @@ -79,6 +87,10 @@ namespace RHI::Dummy { void SetPrimitiveTopology(PrimitiveTopology primitiveTopology) override; void SetBlendConstant(const float*/*[4]*/ constants) override; void SetStencilReference(uint32_t reference) override; + void DrawIndirect(Buffer* indirectBuffer, size_t offset) override; + void DrawIndexedIndirect(Buffer* indirectBuffer, size_t offset) override; + void MultiDrawIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) override; + void MultiDrawIndexedIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) override; void EndPass() override; }; } diff --git a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h index 01912270..08b72d74 100644 --- a/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h +++ b/Engine/Source/RHI-Dummy/Include/RHI/Dummy/Gpu.h @@ -14,6 +14,8 @@ namespace RHI::Dummy { explicit DummyGpu(DummyInstance& inInstance); ~DummyGpu() override; GpuProperty GetProperty() override; + FeatureFlags GetFeatures() override; + GpuLimits GetLimits() override; Common::UniquePtr RequestDevice(const DeviceCreateInfo& createInfo) override; DummyInstance& GetInstance() const override; diff --git a/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp b/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp index f981c7c2..4d891b08 100644 --- a/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Dummy/Src/CommandRecorder.cpp @@ -17,6 +17,14 @@ namespace RHI::Dummy { { } + void DummyCopyPassCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyCopyPassCommandRecorder::EndMarker() + { + } + void DummyCopyPassCommandRecorder::CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) { } @@ -47,6 +55,14 @@ namespace RHI::Dummy { { } + void DummyComputePassCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyComputePassCommandRecorder::EndMarker() + { + } + void DummyComputePassCommandRecorder::SetPipeline(ComputePipeline* pipeline) { } @@ -73,6 +89,14 @@ namespace RHI::Dummy { { } + void DummyRasterPassCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyRasterPassCommandRecorder::EndMarker() + { + } + void DummyRasterPassCommandRecorder::SetPipeline(RasterPipeline* pipeline) { } @@ -117,6 +141,22 @@ namespace RHI::Dummy { { } + void DummyRasterPassCommandRecorder::DrawIndirect(Buffer* indirectBuffer, size_t offset) + { + } + + void DummyRasterPassCommandRecorder::DrawIndexedIndirect(Buffer* indirectBuffer, size_t offset) + { + } + + void DummyRasterPassCommandRecorder::MultiDrawIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) + { + } + + void DummyRasterPassCommandRecorder::MultiDrawIndexedIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) + { + } + void DummyRasterPassCommandRecorder::EndPass() { } @@ -132,6 +172,14 @@ namespace RHI::Dummy { { } + void DummyCommandRecorder::BeginMarker(const std::string& label) + { + } + + void DummyCommandRecorder::EndMarker() + { + } + Common::UniquePtr DummyCommandRecorder::BeginCopyPass() { return Common::UniquePtr(new DummyCopyPassCommandRecorder(dummyCommandBuffer)); diff --git a/Engine/Source/RHI-Dummy/Src/Gpu.cpp b/Engine/Source/RHI-Dummy/Src/Gpu.cpp index 5409865f..d02abc00 100644 --- a/Engine/Source/RHI-Dummy/Src/Gpu.cpp +++ b/Engine/Source/RHI-Dummy/Src/Gpu.cpp @@ -18,6 +18,16 @@ namespace RHI::Dummy { return {}; } + FeatureFlags DummyGpu::GetFeatures() + { + return FeatureFlags::null; + } + + GpuLimits DummyGpu::GetLimits() + { + return {}; + } + Common::UniquePtr DummyGpu::RequestDevice(const DeviceCreateInfo& createInfo) { return { new DummyDevice(*this, createInfo) }; diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h index f3586f4e..8a679008 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Buffer.h @@ -34,5 +34,8 @@ namespace RHI::Vulkan { VkBuffer nativeBuffer; VmaAllocation nativeAllocation; BufferUsageFlags usages; + MapMode mapMode; + size_t mapOffset; + size_t mapLength; }; } diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h index cdad3f56..8a297530 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/CommandRecorder.h @@ -22,6 +22,8 @@ namespace RHI::Vulkan { ~VulkanCommandRecorder() override; void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; Common::UniquePtr BeginCopyPass() override; Common::UniquePtr BeginComputePass() override; Common::UniquePtr BeginRasterPass(const RasterPassBeginInfo& inBeginInfo) override; @@ -40,6 +42,8 @@ namespace RHI::Vulkan { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // CopyPassCommandRecorder void CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) override; @@ -62,6 +66,8 @@ namespace RHI::Vulkan { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // ComputePassCommandRecorder void SetPipeline(ComputePipeline* inPipeline) override; @@ -84,6 +90,8 @@ namespace RHI::Vulkan { // CommonCommandRecorder void ResourceBarrier(const Barrier& inBarrier) override; + void BeginMarker(const std::string& inLabel) override; + void EndMarker() override; // RasterPassCommandRecorder void SetPipeline(RasterPipeline* inPipeline) override; @@ -97,6 +105,10 @@ namespace RHI::Vulkan { void SetPrimitiveTopology(PrimitiveTopology inPrimitiveTopology) override; void SetBlendConstant(const float* inConstants) override; void SetStencilReference(uint32_t inReference) override; + void DrawIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void DrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset) override; + void MultiDrawIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; + void MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, size_t inOffset, size_t inDrawCount) override; void EndPass() override; private: diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h index a2c62f3b..ff34c927 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Gpu.h @@ -17,6 +17,8 @@ namespace RHI::Vulkan { ~VulkanGpu() override; GpuProperty GetProperty() override; + FeatureFlags GetFeatures() override; + GpuLimits GetLimits() override; Common::UniquePtr RequestDevice(const DeviceCreateInfo& inCreateInfo) override; VulkanInstance& GetInstance() const override; diff --git a/Engine/Source/RHI-Vulkan/Src/Buffer.cpp b/Engine/Source/RHI-Vulkan/Src/Buffer.cpp index eba554e0..20391e49 100644 --- a/Engine/Source/RHI-Vulkan/Src/Buffer.cpp +++ b/Engine/Source/RHI-Vulkan/Src/Buffer.cpp @@ -16,6 +16,9 @@ namespace RHI::Vulkan { : Buffer(inCreateInfo) , device(inDevice) , usages(inCreateInfo.usages) + , mapMode(MapMode::read) + , mapOffset(0) + , mapLength(0) { CreateNativeBuffer(inCreateInfo); TransitionToInitState(inCreateInfo); @@ -28,15 +31,25 @@ namespace RHI::Vulkan { } } - void* VulkanBuffer::Map(MapMode inMapMode, size_t inOffset, size_t inLength) + void* VulkanBuffer::Map(const MapMode inMapMode, const size_t inOffset, const size_t inLength) { + mapMode = inMapMode; + mapOffset = inOffset; + mapLength = inLength; + void* data; Assert(vmaMapMemory(device.GetNativeAllocator(), nativeAllocation, &data) == VK_SUCCESS); - return data; + if (inMapMode == MapMode::read) { + Assert(vmaInvalidateAllocation(device.GetNativeAllocator(), nativeAllocation, inOffset, inLength) == VK_SUCCESS); + } + return static_cast(data) + inOffset; } void VulkanBuffer::Unmap() { + if (mapMode == MapMode::write) { + Assert(vmaFlushAllocation(device.GetNativeAllocator(), nativeAllocation, mapOffset, mapLength) == VK_SUCCESS); + } vmaUnmapMemory(device.GetNativeAllocator(), nativeAllocation); } @@ -55,8 +68,10 @@ namespace RHI::Vulkan { VmaAllocationCreateInfo allocInfo = {}; allocInfo.usage = VMA_MEMORY_USAGE_AUTO; - if (inCreateInfo.usages | BufferUsageBits::mapWrite) { + if (inCreateInfo.usages & BufferUsageBits::mapWrite) { allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + } else if (inCreateInfo.usages & BufferUsageBits::mapRead) { + allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; } Assert(vmaCreateBuffer(device.GetNativeAllocator(), &bufferInfo, &allocInfo, &nativeBuffer, &nativeAllocation, nullptr) == VK_SUCCESS); diff --git a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp index 9a1b1233..dfb093ba 100644 --- a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp @@ -27,7 +27,8 @@ namespace RHI::Vulkan { { BufferState::copyDst, VK_ACCESS_TRANSFER_WRITE_BIT }, { BufferState::shaderReadOnly, VK_ACCESS_SHADER_READ_BIT }, { BufferState::storage, VK_ACCESS_SHADER_READ_BIT }, - { BufferState::rwStorage, VK_ACCESS_SHADER_WRITE_BIT } + { BufferState::rwStorage, VK_ACCESS_SHADER_WRITE_BIT }, + { BufferState::indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT } }; return map.at(inState); } @@ -41,8 +42,8 @@ namespace RHI::Vulkan { { BufferState::copyDst, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::shaderReadOnly, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, - { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } - + { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, + { BufferState::indirect, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT } }; return map.at(inState); } @@ -56,7 +57,8 @@ namespace RHI::Vulkan { { BufferState::copyDst, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::shaderReadOnly, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, - { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } + { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, + { BufferState::indirect, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT } }; return map.at(inState); } @@ -137,16 +139,14 @@ namespace RHI::Vulkan { static VkBufferImageCopy GetNativeBufferImageCopy(Device& device, const Texture& texture, const BufferTextureCopyInfo& copyInfo) { - const auto aspectLayout = device.GetTextureSubResourceCopyFootprint(texture, copyInfo.textureSubResource); // NOLINT - const auto createInfo = texture.GetCreateInfo(); - - const auto linearRowPitch = GetBytesPerPixel(createInfo.format) * copyInfo.copyRegion.x; - const auto linearSlicePitch = linearRowPitch * copyInfo.copyRegion.y; + const auto footprint = device.GetTextureSubResourceCopyFootprint(texture, copyInfo.textureSubResource); // NOLINT VkBufferImageCopy result {}; result.bufferOffset = copyInfo.bufferOffset; - result.bufferRowLength = aspectLayout.rowPitch == linearRowPitch ? 0 : aspectLayout.rowPitch; - result.bufferImageHeight = aspectLayout.slicePitch == linearSlicePitch ? 0 : aspectLayout.slicePitch; + // bufferRowLength/bufferImageHeight are measured in texels and describe how the linear buffer data is strided; + // they mirror the full sub-resource footprint, while imageExtent selects the copied window within it. + result.bufferRowLength = static_cast(footprint.rowPitch / footprint.bytesPerPixel); + result.bufferImageHeight = footprint.extent.y; result.imageOffset = { static_cast(copyInfo.textureOrigin.x), static_cast(copyInfo.textureOrigin.y), static_cast(copyInfo.textureOrigin.z) }; result.imageExtent = { copyInfo.copyRegion.x, copyInfo.copyRegion.y, copyInfo.copyRegion.z }; result.imageSubresource = GetNativeImageSubResourceLayers(copyInfo.textureSubResource); @@ -209,6 +209,26 @@ namespace RHI::Vulkan { } } + void VulkanCommandRecorder::BeginMarker(const std::string& inLabel) + { +#if BUILD_CONFIG_DEBUG + VkDebugUtilsLabelEXT labelInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT }; + labelInfo.pLabelName = inLabel.c_str(); + labelInfo.color[0] = labelInfo.color[1] = labelInfo.color[2] = labelInfo.color[3] = 1.0f; + + auto* pfn = device.GetGpu().GetInstance().FindOrGetTypedDynamicFuncPointer("vkCmdBeginDebugUtilsLabelEXT"); + pfn(commandBuffer.GetNative(), &labelInfo); +#endif + } + + void VulkanCommandRecorder::EndMarker() + { +#if BUILD_CONFIG_DEBUG + auto* pfn = device.GetGpu().GetInstance().FindOrGetTypedDynamicFuncPointer("vkCmdEndDebugUtilsLabelEXT"); + pfn(commandBuffer.GetNative()); +#endif + } + Common::UniquePtr VulkanCommandRecorder::BeginCopyPass() { return Common::UniquePtr(new VulkanCopyPassCommandRecorder(device, *this, commandBuffer)); @@ -243,6 +263,16 @@ namespace RHI::Vulkan { commandRecorder.ResourceBarrier(inBarrier); } + void VulkanCopyPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void VulkanCopyPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void VulkanCopyPassCommandRecorder::CopyBufferToBuffer(Buffer* src, Buffer* dst, const BufferCopyInfo& copyInfo) { const auto* srcBuffer = static_cast(src); @@ -308,6 +338,16 @@ namespace RHI::Vulkan { commandRecorder.ResourceBarrier(inBarrier); } + void VulkanComputePassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void VulkanComputePassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void VulkanComputePassCommandRecorder::SetPipeline(ComputePipeline* inPipeline) { computePipeline = static_cast(inPipeline); @@ -408,6 +448,16 @@ namespace RHI::Vulkan { commandRecorder.ResourceBarrier(inBarrier); } + void VulkanRasterPassCommandRecorder::BeginMarker(const std::string& inLabel) + { + commandRecorder.BeginMarker(inLabel); + } + + void VulkanRasterPassCommandRecorder::EndMarker() + { + commandRecorder.EndMarker(); + } + void VulkanRasterPassCommandRecorder::SetPipeline(RasterPipeline* inPipeline) { rasterPipeline = static_cast(inPipeline); @@ -496,6 +546,28 @@ namespace RHI::Vulkan { vkCmdSetStencilReference(commandBuffer.GetNative(), VK_STENCIL_FACE_FRONT_AND_BACK, inReference); } + void VulkanRasterPassCommandRecorder::DrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndirect(inIndirectBuffer, inOffset, 1); + } + + void VulkanRasterPassCommandRecorder::DrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset) + { + MultiDrawIndexedIndirect(inIndirectBuffer, inOffset, 1); + } + + void VulkanRasterPassCommandRecorder::MultiDrawIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + vkCmdDrawIndirect(commandBuffer.GetNative(), indirectBuffer->GetNative(), inOffset, inDrawCount, sizeof(DrawIndirectArguments)); + } + + void VulkanRasterPassCommandRecorder::MultiDrawIndexedIndirect(Buffer* inIndirectBuffer, const size_t inOffset, const size_t inDrawCount) + { + const auto* indirectBuffer = static_cast(inIndirectBuffer); + vkCmdDrawIndexedIndirect(commandBuffer.GetNative(), indirectBuffer->GetNative(), inOffset, inDrawCount, sizeof(DrawIndexedIndirectArguments)); + } + void VulkanRasterPassCommandRecorder::EndPass() { auto* pfn = device.GetGpu().GetInstance().FindOrGetTypedDynamicFuncPointer("vkCmdEndRenderingKHR"); diff --git a/Engine/Source/RHI-Vulkan/Src/Device.cpp b/Engine/Source/RHI-Vulkan/Src/Device.cpp index e9688802..1826cf85 100644 --- a/Engine/Source/RHI-Vulkan/Src/Device.cpp +++ b/Engine/Source/RHI-Vulkan/Src/Device.cpp @@ -170,13 +170,19 @@ namespace RHI::Vulkan { TextureSubResourceCopyFootprint VulkanDevice::GetTextureSubResourceCopyFootprint(const Texture& texture, const TextureSubResourceInfo& subResourceInfo) { const auto& createInfo = texture.GetCreateInfo(); + const auto mipLevel = subResourceInfo.mipLevel; + const auto baseDepth = createInfo.dimension == TextureDimension::t3D ? createInfo.depthOrArraySize : 1; TextureSubResourceCopyFootprint result {}; - result.extent = { createInfo.width, createInfo.height, createInfo.dimension == TextureDimension::t3D ? createInfo.depthOrArraySize : 1 }; + result.extent = { + std::max(createInfo.width >> mipLevel, 1u), + std::max(createInfo.height >> mipLevel, 1u), + std::max(baseDepth >> mipLevel, 1u) + }; result.bytesPerPixel = GetBytesPerPixel(createInfo.format); result.rowPitch = result.bytesPerPixel * result.extent.x; result.slicePitch = result.rowPitch * result.extent.y; - result.totalBytes = result.bytesPerPixel * result.extent.x * result.extent.y * result.extent.z; + result.totalBytes = result.slicePitch * result.extent.z; return result; } diff --git a/Engine/Source/RHI-Vulkan/Src/Gpu.cpp b/Engine/Source/RHI-Vulkan/Src/Gpu.cpp index d155bb69..06bf4db0 100644 --- a/Engine/Source/RHI-Vulkan/Src/Gpu.cpp +++ b/Engine/Source/RHI-Vulkan/Src/Gpu.cpp @@ -2,6 +2,8 @@ // Created by johnk on 12/1/2022. // +#include + #include #include #include @@ -27,6 +29,63 @@ namespace RHI::Vulkan { return property; } + FeatureFlags VulkanGpu::GetFeatures() + { + VkPhysicalDeviceFeatures features; + vkGetPhysicalDeviceFeatures(nativePhysicalDevice, &features); + + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(nativePhysicalDevice, &properties); + + FeatureFlags result = FeatureFlags::null; + if (features.samplerAnisotropy) { result = result | FeatureBits::samplerAnisotropy; } + if (features.textureCompressionBC) { result = result | FeatureBits::textureCompressionBc; } + if (properties.limits.timestampComputeAndGraphics) { result = result | FeatureBits::timestampQuery; } + if (features.multiDrawIndirect) { result = result | FeatureBits::multiDrawIndirect; } + if (features.drawIndirectFirstInstance) { result = result | FeatureBits::drawIndirectFirstInstance; } + return result; + } + + GpuLimits VulkanGpu::GetLimits() + { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(nativePhysicalDevice, &properties); + const auto& nativeLimits = properties.limits; + + // Vulkan 1.0 has no direct max-buffer-size query, so the largest device-local heap is used as an approximation. + VkPhysicalDeviceMemoryProperties memoryProperties; + vkGetPhysicalDeviceMemoryProperties(nativePhysicalDevice, &memoryProperties); + VkDeviceSize maxDeviceLocalHeapSize = 0; + for (uint32_t i = 0; i < memoryProperties.memoryHeapCount; i++) { + if ((memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { + maxDeviceLocalHeapSize = std::max(maxDeviceLocalHeapSize, memoryProperties.memoryHeaps[i].size); + } + } + + GpuLimits result {}; + result.maxTextureDimension1D = nativeLimits.maxImageDimension1D; + result.maxTextureDimension2D = nativeLimits.maxImageDimension2D; + result.maxTextureDimension3D = nativeLimits.maxImageDimension3D; + result.maxTextureArrayLayers = nativeLimits.maxImageArrayLayers; + result.maxBindGroups = nativeLimits.maxBoundDescriptorSets; + result.maxVertexBuffers = nativeLimits.maxVertexInputBindings; + result.maxVertexAttributes = nativeLimits.maxVertexInputAttributes; + result.maxColorAttachments = nativeLimits.maxColorAttachments; + result.maxUniformBufferBindingSize = nativeLimits.maxUniformBufferRange; + result.maxStorageBufferBindingSize = nativeLimits.maxStorageBufferRange; + result.maxBufferSize = maxDeviceLocalHeapSize; + result.minUniformBufferOffsetAlignment = static_cast(nativeLimits.minUniformBufferOffsetAlignment); + result.minStorageBufferOffsetAlignment = static_cast(nativeLimits.minStorageBufferOffsetAlignment); + result.optimalBufferCopyOffsetAlignment = static_cast(nativeLimits.optimalBufferCopyOffsetAlignment); + result.optimalBufferCopyRowPitchAlignment = static_cast(nativeLimits.optimalBufferCopyRowPitchAlignment); + result.maxComputeWorkgroupSizeX = nativeLimits.maxComputeWorkGroupSize[0]; + result.maxComputeWorkgroupSizeY = nativeLimits.maxComputeWorkGroupSize[1]; + result.maxComputeWorkgroupSizeZ = nativeLimits.maxComputeWorkGroupSize[2]; + result.maxComputeInvocationsPerWorkgroup = nativeLimits.maxComputeWorkGroupInvocations; + result.maxComputeWorkgroupsPerDimension = nativeLimits.maxComputeWorkGroupCount[0]; + return result; + } + Common::UniquePtr VulkanGpu::RequestDevice(const DeviceCreateInfo& inCreateInfo) { return { new VulkanDevice(*this, inCreateInfo) }; diff --git a/Engine/Source/RHI/Include/RHI/CommandRecorder.h b/Engine/Source/RHI/Include/RHI/CommandRecorder.h index ac747439..3b9a4a28 100644 --- a/Engine/Source/RHI/Include/RHI/CommandRecorder.h +++ b/Engine/Source/RHI/Include/RHI/CommandRecorder.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -92,6 +93,21 @@ namespace RHI { BufferTextureCopyInfo& SetCopyRegion(const Common::UVec3& inCopyRegion); }; + struct DrawIndirectArguments { + uint32_t vertexCount = 0; + uint32_t instanceCount = 0; + uint32_t firstVertex = 0; + uint32_t firstInstance = 0; + }; + + struct DrawIndexedIndirectArguments { + uint32_t indexCount = 0; + uint32_t instanceCount = 0; + uint32_t firstIndex = 0; + int32_t baseVertex = 0; + uint32_t firstInstance = 0; + }; + template struct ColorAttachmentBase { LoadOp loadOp; @@ -184,6 +200,8 @@ namespace RHI { public: virtual ~CommonCommandRecorder(); virtual void ResourceBarrier(const Barrier& barrier) = 0; + virtual void BeginMarker(const std::string& label) = 0; + virtual void EndMarker() = 0; }; class CopyPassCommandRecorder : public CommonCommandRecorder { @@ -232,9 +250,10 @@ namespace RHI { virtual void SetPrimitiveTopology(PrimitiveTopology primitiveTopology) = 0; virtual void SetBlendConstant(const float*/*[4]*/ constants) = 0; virtual void SetStencilReference(uint32_t reference) = 0; - // TODO DrawIndirect(...) - // TODO DrawIndexedIndirect(...) - // TODO MultiIndirectDraw(...) + virtual void DrawIndirect(Buffer* indirectBuffer, size_t offset) = 0; + virtual void DrawIndexedIndirect(Buffer* indirectBuffer, size_t offset) = 0; + virtual void MultiDrawIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) = 0; + virtual void MultiDrawIndexedIndirect(Buffer* indirectBuffer, size_t offset, size_t drawCount) = 0; virtual void EndPass() = 0; protected: @@ -254,8 +273,27 @@ namespace RHI { protected: CommandRecorder(); }; + + class ScopedMarker { + public: + NonCopyable(ScopedMarker) + ScopedMarker(CommonCommandRecorder& inRecorder, const std::string& inLabel); + ~ScopedMarker(); + + private: + CommonCommandRecorder& recorder; + }; } +#define RHI_MARKER_CONCAT_IMPL(a, b) a##b +#define RHI_MARKER_CONCAT(a, b) RHI_MARKER_CONCAT_IMPL(a, b) +#if BUILD_CONFIG_DEBUG +#define RHI_SCOPED_MARKER(recorder, label) \ + RHI::ScopedMarker RHI_MARKER_CONCAT(rhiScopedMarker_, __COUNTER__) { recorder, label } +#else +#define RHI_SCOPED_MARKER(recorder, label) ((void) 0) +#endif + namespace RHI { template ColorAttachmentBase::ColorAttachmentBase( diff --git a/Engine/Source/RHI/Include/RHI/Common.h b/Engine/Source/RHI/Include/RHI/Common.h index 594174c0..ea4c46cf 100644 --- a/Engine/Source/RHI/Include/RHI/Common.h +++ b/Engine/Source/RHI/Include/RHI/Common.h @@ -392,6 +392,7 @@ namespace RHI { shaderReadOnly, storage, rwStorage, + indirect, max }; @@ -462,6 +463,17 @@ namespace RHI { }; using ColorWriteFlags = Common::Flags; DECLARE_FLAG_BITS_OP(ColorWriteFlags, ColorWriteBits) + + enum class FeatureBits : uint32_t { + samplerAnisotropy = 0x1, + textureCompressionBc = 0x2, + timestampQuery = 0x4, + multiDrawIndirect = 0x8, + drawIndirectFirstInstance = 0x10, + max = 0x20 + }; + using FeatureFlags = Common::Flags; + DECLARE_FLAG_BITS_OP(FeatureFlags, FeatureBits) } namespace RHI { diff --git a/Engine/Source/RHI/Include/RHI/Gpu.h b/Engine/Source/RHI/Include/RHI/Gpu.h index decf5b7d..981e8655 100644 --- a/Engine/Source/RHI/Include/RHI/Gpu.h +++ b/Engine/Source/RHI/Include/RHI/Gpu.h @@ -20,11 +20,36 @@ namespace RHI { GpuType type; }; + struct GpuLimits { + uint32_t maxTextureDimension1D; + uint32_t maxTextureDimension2D; + uint32_t maxTextureDimension3D; + uint32_t maxTextureArrayLayers; + uint32_t maxBindGroups; + uint32_t maxVertexBuffers; + uint32_t maxVertexAttributes; + uint32_t maxColorAttachments; + uint64_t maxUniformBufferBindingSize; + uint64_t maxStorageBufferBindingSize; + uint64_t maxBufferSize; + uint32_t minUniformBufferOffsetAlignment; + uint32_t minStorageBufferOffsetAlignment; + uint32_t optimalBufferCopyOffsetAlignment; + uint32_t optimalBufferCopyRowPitchAlignment; + uint32_t maxComputeWorkgroupSizeX; + uint32_t maxComputeWorkgroupSizeY; + uint32_t maxComputeWorkgroupSizeZ; + uint32_t maxComputeInvocationsPerWorkgroup; + uint32_t maxComputeWorkgroupsPerDimension; + }; + class Gpu { public: NonCopyable(Gpu) virtual ~Gpu(); virtual GpuProperty GetProperty() = 0; + virtual FeatureFlags GetFeatures() = 0; + virtual GpuLimits GetLimits() = 0; virtual Common::UniquePtr RequestDevice(const DeviceCreateInfo& createInfo) = 0; virtual Instance& GetInstance() const = 0; diff --git a/Engine/Source/RHI/Src/CommandRecorder.cpp b/Engine/Source/RHI/Src/CommandRecorder.cpp index d553f51b..c3f67420 100644 --- a/Engine/Source/RHI/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI/Src/CommandRecorder.cpp @@ -207,4 +207,15 @@ namespace RHI { RasterPassCommandRecorder::RasterPassCommandRecorder() = default; RasterPassCommandRecorder::~RasterPassCommandRecorder() = default; + + ScopedMarker::ScopedMarker(CommonCommandRecorder& inRecorder, const std::string& inLabel) + : recorder(inRecorder) + { + recorder.BeginMarker(inLabel); + } + + ScopedMarker::~ScopedMarker() + { + recorder.EndMarker(); + } } diff --git a/Engine/Source/Render/Src/RenderGraph.cpp b/Engine/Source/Render/Src/RenderGraph.cpp index c119bac8..247777cf 100644 --- a/Engine/Source/Render/Src/RenderGraph.cpp +++ b/Engine/Source/Render/Src/RenderGraph.cpp @@ -783,6 +783,7 @@ namespace Render { void RGBuilder::ExecuteCopyPass(RHI::CommandRecorder& inRecoder, RGCopyPass* inCopyPass) { + RHI_SCOPED_MARKER(inRecoder, inCopyPass->name); DevirtualizeResources(passWritesMap.at(inCopyPass)); { TransitionResourcesForCopyPassDesc(inRecoder, inCopyPass->passDesc); @@ -803,6 +804,7 @@ namespace Render { void RGBuilder::ExecuteComputePass(RHI::CommandRecorder& inRecoder, RGComputePass* inComputePass) { + RHI_SCOPED_MARKER(inRecoder, inComputePass->name); DevirtualizeResources(passWritesMap.at(inComputePass)); DevirtualizeBindGroupsAndViews(inComputePass->bindGroups); { @@ -825,6 +827,7 @@ namespace Render { void RGBuilder::ExecuteRasterPass(RHI::CommandRecorder& inRecoder, RGRasterPass* inRasterPass) { + RHI_SCOPED_MARKER(inRecoder, inRasterPass->name); DevirtualizeResources(passWritesMap.at(inRasterPass)); DevirtualizeAttachmentViews(inRasterPass->passDesc); DevirtualizeBindGroupsAndViews(inRasterPass->bindGroups); diff --git a/Engine/Source/Runtime/Src/Asset/Texture.cpp b/Engine/Source/Runtime/Src/Asset/Texture.cpp index d3cd7152..449a2be1 100644 --- a/Engine/Source/Runtime/Src/Asset/Texture.cpp +++ b/Engine/Source/Runtime/Src/Asset/Texture.cpp @@ -184,6 +184,7 @@ namespace Runtime { void Texture::UpdateMips() { const auto arraySize = type == TextureType::t3D ? 1 : depthOrArraySize; + const auto depth = type == TextureType::t3D ? depthOrArraySize : 1; const auto bytesPerPixel = RHI::GetBytesPerPixel(static_cast(format)); subResourcePixelsData.clear(); @@ -192,9 +193,10 @@ namespace Runtime { for (auto m = 0; m < mipLevels; m++) { const auto mipWidth = std::max(width >> m, 1u); const auto mipHeight = std::max(height >> m, 1u); + const auto mipDepth = std::max(depth >> m, 1u); for (auto a = 0; a < arraySize; a++) { - subResourcePixelsData[Internal::GetSubResourceIndex(m, a, arraySize)].resize(mipWidth * mipHeight * bytesPerPixel); + subResourcePixelsData[Internal::GetSubResourceIndex(m, a, arraySize)].resize(mipWidth * mipHeight * mipDepth * bytesPerPixel); } } } @@ -230,8 +232,6 @@ namespace Runtime { texturePtr = texture.Get(), type = type, format = format, - width = width, - height = height, depthOrArraySize = depthOrArraySize, mipLevels = mipLevels, aspect = Internal::GetTextureAspect(format), @@ -239,7 +239,6 @@ namespace Runtime { name = name ]() -> void { const auto arraySize = type == TextureType::t3D ? 1 : depthOrArraySize; - const auto depth = type == TextureType::t3D ? depthOrArraySize : 1; std::vector copyFootprints; copyFootprints.reserve(mipLevels * arraySize); @@ -261,8 +260,7 @@ namespace Runtime { .SetInitialState(RHI::BufferState::staging) .SetDebugName(std::format("StagingBuffer-{}", name))); - const auto srcRowPitch = width * RHI::GetBytesPerPixel(static_cast(format)); - const auto srcSlicePitch = width * height * RHI::GetBytesPerPixel(static_cast(format)); + const auto bytesPerPixel = RHI::GetBytesPerPixel(static_cast(format)); size_t dstSubResourceOffset = 0; auto* dstData = static_cast(stagingBuffer->Map(RHI::MapMode::write, 0, totalBytes)); @@ -272,8 +270,10 @@ namespace Runtime { const auto& srcPixels = subResourcePixelsData[subResourceIndex]; const auto& dstCopyFootprint = copyFootprints[subResourceIndex]; - for (auto z = 0; z < depthOrArraySize; z++) { - for (auto y = 0; y < height; y++) { + const auto srcRowPitch = dstCopyFootprint.extent.x * bytesPerPixel; + const auto srcSlicePitch = srcRowPitch * dstCopyFootprint.extent.y; + for (auto z = 0u; z < dstCopyFootprint.extent.z; z++) { + for (auto y = 0u; y < dstCopyFootprint.extent.y; y++) { const auto* src = srcPixels.data() + srcSlicePitch * z + srcRowPitch * y; auto* dst = dstData + dstSubResourceOffset + dstCopyFootprint.slicePitch * z + dstCopyFootprint.rowPitch * y; memcpy(dst, src, srcRowPitch); @@ -300,7 +300,7 @@ namespace Runtime { .SetBufferOffset(dstSubResourceOffset) .SetTextureSubResource(RHI::TextureSubResourceInfo(m, a, aspect)) .SetTextureOrigin({ 0, 0, 0 }) - .SetCopyRegion({ width, height, depth })); + .SetCopyRegion(copyFootprints[subResourceIndex].extent)); dstSubResourceOffset += copyFootprints[subResourceIndex].totalBytes; } } diff --git a/ThirdParty/ConanRecipes/dxc/conanfile.py b/ThirdParty/ConanRecipes/dxc/conanfile.py index 0f4cf8e0..98b2a1db 100644 --- a/ThirdParty/ConanRecipes/dxc/conanfile.py +++ b/ThirdParty/ConanRecipes/dxc/conanfile.py @@ -38,6 +38,12 @@ def source(self): def generate(self): cmake_toolchain = CMakeToolchain(self, generator="Ninja") + # DXC's StringRef.h specializes std::is_nothrow_constructible (an upstream "HLSL Change"). Newer Clang flags + # specializing a standard library entity as -Winvalid-specialization, which is an error by default, so recent + # Apple Clang fails to build the pinned source. Suppress the diagnostic (the specialization still takes effect, + # preserving upstream behavior); -Wno-unknown-warning-option keeps older Clang that lacks the flag happy. + if str(self.settings.compiler) in ("clang", "apple-clang"): + cmake_toolchain.extra_cxxflags.extend(["-Wno-unknown-warning-option", "-Wno-invalid-specialization"]) cmake_toolchain.generate() deps = CMakeDeps(self)