diff --git a/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp b/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp index e7c5490123..babf5b7953 100644 --- a/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp +++ b/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp @@ -77,6 +77,7 @@ class DxilAnnotateWithVirtualRegister : public llvm::ModulePass { private: void AnnotateValues(llvm::Instruction *pI); void AnnotateStore(llvm::Instruction *pI); + void SplitVectorStores(hlsl::OP *HlslOP, llvm::Instruction *pI); bool IsAllocaRegisterWrite(llvm::Value *V, llvm::AllocaInst **pAI, llvm::Value **pIdx); void AnnotateAlloca(llvm::AllocaInst *pAlloca); @@ -133,6 +134,15 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { auto instrumentableFunctions = PIXPassHelpers::GetAllInstrumentableFunctions(*m_DM); + for (auto *F : instrumentableFunctions) { + for (auto &block : F->getBasicBlockList()) { + for (auto it = block.begin(); it != block.end();) { + llvm::Instruction *I = &*(it++); + SplitVectorStores(m_DM->GetOP(), I); + } + } + } + for (auto *F : instrumentableFunctions) { for (auto &block : F->getBasicBlockList()) { for (llvm::Instruction &I : block.getInstList()) { @@ -297,15 +307,37 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite( return false; } // And of course the member we're after might not be at the beginning of - // the struct: - auto *pStructType = llvm::dyn_cast( - pPointerGEP->getPointerOperandType()->getPointerElementType()); - auto *pStructMember = - llvm::dyn_cast(pPointerGEP->getOperand(2)); - uint64_t memberIndex = pStructMember->getLimitedValue(); - for (uint64_t i = 0; i < memberIndex; ++i) { - precedingMemberCount += - CountStructMembers(pStructType->getStructElementType(i)); + // any containing struct: + if (auto *pStructType = llvm::dyn_cast( + pPointerGEP->getPointerOperandType() + ->getPointerElementType())) { + auto *pStructMember = + llvm::dyn_cast(pPointerGEP->getOperand(2)); + uint64_t memberIndex = pStructMember->getLimitedValue(); + for (uint64_t i = 0; i < memberIndex; ++i) { + precedingMemberCount += + CountStructMembers(pStructType->getStructElementType(i)); + } + } + + // And the source pointer may be a vector (floatn) type, + // and if so, that's another offset to consider. + llvm::Type *DestType = pGEP->getPointerOperand()->getType(); + // We expect this to be a pointer type (it's a GEP after all): + if (DestType->isPointerTy()) { + llvm::Type *PointedType = DestType->getPointerElementType(); + // Being careful to check num operands too in order to avoid false + // positives: + if (PointedType->isVectorTy() && pGEP->getNumOperands() == 3) { + // Fetch the second deref (in operand 2). + // (the first derefs the pointer to the "floatn", + // and the second denotes the index into the floatn.) + llvm::Value *vectorIndex = pGEP->getOperand(2); + if (auto *constIntIIndex = + llvm::cast(vectorIndex)) { + precedingMemberCount += constIntIIndex->getLimitedValue(); + } + } } } else { return false; @@ -365,6 +397,8 @@ void DxilAnnotateWithVirtualRegister::AnnotateAlloca( AssignNewAllocaRegister(pAlloca, 1); } else if (auto *AT = llvm::dyn_cast(pAllocaTy)) { AssignNewAllocaRegister(pAlloca, AT->getNumElements()); + } else if (auto *VT = llvm::dyn_cast(pAllocaTy)) { + AssignNewAllocaRegister(pAlloca, VT->getNumElements()); } else if (auto *ST = llvm::dyn_cast(pAllocaTy)) { AssignNewAllocaRegister(pAlloca, CountStructMembers(ST)); } else { @@ -433,6 +467,36 @@ void DxilAnnotateWithVirtualRegister::AssignNewAllocaRegister( m_uVReg += C; } +void DxilAnnotateWithVirtualRegister::SplitVectorStores(hlsl::OP *HlslOP, + llvm::Instruction *pI) { + auto *pSt = llvm::dyn_cast(pI); + if (pSt == nullptr) { + return; + } + + llvm::AllocaInst *Alloca; + llvm::Value *Index; + if (!IsAllocaRegisterWrite(pSt->getPointerOperand(), &Alloca, &Index)) { + return; + } + + llvm::Type *SourceType = pSt->getValueOperand()->getType(); + if (SourceType->isVectorTy()) { + if (auto *constIntIIndex = llvm::cast(Index)) { + // break vector alloca stores up into individual stores + llvm::IRBuilder<> B(pSt); + for (uint64_t el = 0; el < SourceType->getVectorNumElements(); ++el) { + llvm::Value *destPointer = B.CreateGEP(pSt->getPointerOperand(), + {B.getInt32(0), B.getInt32(el)}); + llvm::Value *source = + B.CreateExtractElement(pSt->getValueOperand(), el); + B.CreateStore(source, destPointer); + } + pI->eraseFromParent(); + } + } +} + } // namespace using namespace llvm; diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp index 03ab62c50b..4f4cc7c620 100644 --- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp +++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp @@ -1052,6 +1052,9 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { // Done with these guys: m_GEPOperandAsInstructionDestroyers.clear(); + if (OSOverride != nullptr && !Modified) { + *OSOverride << "\nNotModified\n"; + } return Modified; } char DxilShaderAccessTracking::ID = 0; diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index df964bf433..bb81c1c953 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -14,6 +14,7 @@ #endif #include +#include #include #include #include @@ -112,6 +113,11 @@ class PixTest : public ::testing::Test { TEST_METHOD(SignatureModification_VertexIdAlready) TEST_METHOD(SignatureModification_SomethingElseFirst) + TEST_METHOD(AccessTracking_ModificationReport_Nothing) + TEST_METHOD(AccessTracking_ModificationReport_Read) + TEST_METHOD(AccessTracking_ModificationReport_Write) + TEST_METHOD(AccessTracking_ModificationReport_SM66) + TEST_METHOD(PixStructAnnotation_Lib_DualRaygen) TEST_METHOD(PixStructAnnotation_Lib_RaygenAllocaStructAlignment) @@ -144,6 +150,8 @@ class PixTest : public ::testing::Test { TEST_METHOD(DebugInstrumentation_TextOutput) TEST_METHOD(DebugInstrumentation_BlockReport) + TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs) + dxc::DxcDllSupport m_dllSupport; VersionSupportInfo m_ver; @@ -348,6 +356,8 @@ class PixTest : public ::testing::Test { *ppNewShaderOut = pNewContainer.Detach(); } + void ValidateAccessTrackingMods(const char *hlsl, bool modsExpected); + class ModuleAndHangersOn { std::unique_ptr llvmContext; std::unique_ptr llvmModule; @@ -429,12 +439,13 @@ class PixTest : public ::testing::Test { const wchar_t *profile = L"as_6_5"); void ValidateAllocaWrite(std::vector const &allocaWrites, size_t index, const char *name); - CComPtr RunShaderAccessTrackingPass(IDxcBlob *blob); + PassOutput RunShaderAccessTrackingPass(IDxcBlob *blob); std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob); CComPtr RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob); CComPtr RunDxilPIXDXRInvocationsLog(IDxcBlob *blob); void TestPixUAVCase(char const *hlsl, wchar_t const *model, wchar_t const *entry); + std::string Disassemble(IDxcBlob *pProgram); }; bool PixTest::InitSupport() { @@ -576,13 +587,14 @@ TEST_F(PixTest, CompileDebugDisasmPDB) { VERIFY_SUCCEEDED(pCompiler->Disassemble(pPdbBlob, &pDisasm)); } -CComPtr PixTest::RunShaderAccessTrackingPass(IDxcBlob *blob) { +PassOutput PixTest::RunShaderAccessTrackingPass(IDxcBlob *blob) { CComPtr pOptimizer; VERIFY_SUCCEEDED( m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer)); std::vector Options; Options.push_back(L"-opt-mod-passes"); - Options.push_back(L"-hlsl-dxil-pix-shader-access-instrumentation,config="); + Options.push_back(L"-hlsl-dxil-pix-shader-access-instrumentation,config=U0:0:" + L"10i0;U0:1:2i0;.0;0;0."); CComPtr pOptimizedModule; CComPtr pText; @@ -604,7 +616,12 @@ CComPtr PixTest::RunShaderAccessTrackingPass(IDxcBlob *blob) { CComPtr pNewContainer; VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pNewContainer)); - return pNewContainer; + PassOutput ret; + ret.blob = pNewContainer; + std::string outputText = BlobToUtf8(pText); + ret.lines = Tokenize(outputText.c_str(), "\n"); + + return ret; } CComPtr PixTest::RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob) { @@ -816,6 +833,61 @@ TEST_F(PixTest, SignatureModification_SomethingElseFirst) { VERIFY_ARE_EQUAL(sig.GetElement(2).GetStartRow(), 2); } +void PixTest::ValidateAccessTrackingMods(const char *hlsl, bool modsExpected) { + auto code = Compile(m_dllSupport, hlsl, L"ps_6_6", {L"-Od"}, L"main"); + auto result = RunShaderAccessTrackingPass(code).lines; + bool hasMods = true; + for (auto const &line : result) + if (line.find("NotModified") != std::string::npos) + hasMods = false; + VERIFY_ARE_EQUAL(modsExpected, hasMods); +} + +TEST_F(PixTest, AccessTracking_ModificationReport_Nothing) { + const char *hlsl = R"( +float main() : SV_Target +{ + return 0; +} +)"; + ValidateAccessTrackingMods(hlsl, false); +} + +TEST_F(PixTest, AccessTracking_ModificationReport_Read) { + const char *hlsl = R"( +RWByteAddressBuffer g_texture; +float main() : SV_Target +{ + return g_texture.Load(0); +} +)"; + ValidateAccessTrackingMods(hlsl, true); +} + +TEST_F(PixTest, AccessTracking_ModificationReport_Write) { + const char *hlsl = R"( +RWByteAddressBuffer g_texture; +float main() : SV_Target +{ + g_texture.Store(0, 0); + return 0; +} +)"; + ValidateAccessTrackingMods(hlsl, true); +} + +TEST_F(PixTest, AccessTracking_ModificationReport_SM66) { + const char *hlsl = R"( +float main() : SV_Target +{ + RWByteAddressBuffer g_texture = ResourceDescriptorHeap[0]; + g_texture.Store(0, 0); + return 0; +} +)"; + ValidateAccessTrackingMods(hlsl, true); +} + TEST_F(PixTest, AddToASGroupSharedPayload) { const char *hlsl = R"( @@ -1083,6 +1155,16 @@ static bool FindStructMemberFromStore(llvm::StoreInst *S, return false; } +std::string PixTest::Disassemble(IDxcBlob *pProgram) { + CComPtr pCompiler; + CComPtr pResult; + CComPtr pSource; + VERIFY_SUCCEEDED(CreateCompiler(m_dllSupport, &pCompiler)); + CComPtr pDisassembly; + VERIFY_SUCCEEDED(pCompiler->Disassemble(pProgram, &pDisassembly)); + return BlobToUtf8(pDisassembly); +} + // This function lives in lib\DxilPIXPasses\DxilAnnotateWithVirtualRegister.cpp // Declared here so we can test it. uint32_t CountStructMembers(llvm::Type const *pType); @@ -2720,7 +2802,7 @@ void MyMiss(inout MyPayload payload) CComPtr compiled; VERIFY_SUCCEEDED(pResult->GetResult(&compiled)); - auto optimizedContainer = RunShaderAccessTrackingPass(compiled); + auto optimizedContainer = RunShaderAccessTrackingPass(compiled).blob; const char *pBlobContent = reinterpret_cast(optimizedContainer->GetBufferPointer()); @@ -2790,7 +2872,7 @@ float4 main(int i : A, float j : B) : SV_TARGET )x"; auto compiled = Compile(m_dllSupport, dynamicTextureAccess, L"ps_6_6"); - auto pOptimizedContainer = RunShaderAccessTrackingPass(compiled); + auto pOptimizedContainer = RunShaderAccessTrackingPass(compiled).blob; const char *pBlobContent = reinterpret_cast(pOptimizedContainer->GetBufferPointer()); @@ -2955,3 +3037,172 @@ float4 main() : SV_Target { VERIFY_IS_TRUE(foundDoubleAssignment); VERIFY_IS_TRUE(found32BitAllocaStore); } + +std::string ExtractBracedSubstring(std::string const &line) { + auto open = line.find('{'); + auto close = line.find('}'); + if (open != std::string::npos && close != std::string::npos && + open + 1 < close) { + return line.substr(open + 1, close - open - 1); + } + return {}; +} + +int ExtractMetaInt32Value(std::string const &token) { + auto findi32 = token.find("i32 "); + if (findi32 != std::string_view::npos) { + return atoi( + std::string(token.data() + findi32 + 4, token.length() - (findi32 + 4)) + .c_str()); + } + return -1; +} + +std::vector Split(std::string str, char delimeter) { + std::vector lines; + + auto const *p = str.data(); + auto const *justPastPreviousDelimiter = p; + while (p < str.data() + str.length()) { + if (*p == delimeter) { + lines.emplace_back(std::string(justPastPreviousDelimiter, + p - justPastPreviousDelimiter)); + justPastPreviousDelimiter = p + 1; + p = justPastPreviousDelimiter; + } else { + p++; + } + } + + lines.emplace_back( + std::string(justPastPreviousDelimiter, p - justPastPreviousDelimiter)); + + return lines; +} + +struct MetadataAllocaDefinition { + int base; + int count; +}; +using AllocaDefinitions = std::map; +struct MetadataAllocaWrite { + int allocaDefMetadataKey; + int offset; + int size; +}; +using AllocaWrites = std::map; + +struct AllocaMetadata { + AllocaDefinitions allocaDefinitions; + AllocaWrites allocaWrites; + std::vector allocaWritesMetaKeys; +}; + +AllocaMetadata +FindAllocaRelatedMetadata(std::vector const &lines) { + + const char *allocaMetaDataAssignment = "= !{i32 1, "; + const char *allocaRegWRiteAssignment = "= !{i32 2, !"; + const char *allocaRegWriteTag = "!pix-alloca-reg-write !"; + + AllocaMetadata ret; + for (auto const &line : lines) { + if (line[0] == '!') { + auto key = atoi(std::string(line.data() + 1, line.length() - 1).c_str()); + if (key != -1) { + if (line.find(allocaMetaDataAssignment) != std::string::npos) { + std::string bitInBraces = ExtractBracedSubstring(line); + if (bitInBraces != "") { + auto tokens = Split(bitInBraces, ','); + if (tokens.size() == 3) { + auto value0 = ExtractMetaInt32Value(tokens[1]); + auto value1 = ExtractMetaInt32Value(tokens[2]); + if (value0 != -1 && value1 != -1) { + MetadataAllocaDefinition def; + def.base = value0; + def.count = value1; + ret.allocaDefinitions[key] = def; + } + } + } + } else if (line.find(allocaRegWRiteAssignment) != std::string::npos) { + std::string bitInBraces = ExtractBracedSubstring(line); + if (bitInBraces != "") { + auto tokens = Split(bitInBraces, ','); + if (tokens.size() == 4 && tokens[1][1] == '!') { + auto allocaKey = atoi(tokens[1].c_str() + 2); + auto value0 = ExtractMetaInt32Value(tokens[2]); + auto value1 = ExtractMetaInt32Value(tokens[3]); + if (value0 != -1 && value1 != -1) { + MetadataAllocaWrite aw; + aw.allocaDefMetadataKey = allocaKey; + aw.size = value0; + aw.offset = value1; + ret.allocaWrites[key] = aw; + } + } + } + } + } + } else { + auto findAw = line.find(allocaRegWriteTag); + if (findAw != std::string::npos) { + ret.allocaWritesMetaKeys.push_back( + atoi(line.c_str() + findAw + strlen(allocaRegWriteTag))); + } + } + } + return ret; +} + +TEST_F(PixTest, DebugInstrumentation_VectorAllocaWrite_Structs) { + const char *source = R"x( +RaytracingAccelerationStructure Scene : register(t0, space0); +struct RayPayload +{ + float4 color; +}; +RWStructuredBuffer UAV: register(u0); +[shader("raygeneration")] +void RaygenInternalName() +{ + RayDesc ray; + ray.Origin = float3(UAV[0], UAV[1],UAV[3]); + ray.Direction = float3(4.4,5.5,6.6); + ray.TMin = 0.001; + ray.TMax = 10000.0; + RayPayload payload = { float4(0, 1, 0, 1) }; + TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload); +})x"; + + auto compiled = Compile(m_dllSupport, source, L"lib_6_6", {L"-Od"}); + auto output = RunDebugPass(compiled); + auto disassembly = Disassemble(output.blob); + auto lines = Split(disassembly, '\n'); + auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines); + // To validate that the RayDesc and RayPayload instances were fully covered, + // check that there are alloca writes that cover all of them. RayPayload + // has four elements, and RayDesc has eight. + std::array RayPayloadElementCoverage; + std::array RayDescElementCoverage; + + for (auto const &write : metaDataKeyToValue.allocaWrites) { + // the whole point of the changes with this test is to separate vector + // writes into individual elements: + VERIFY_ARE_EQUAL(1, write.second.size); + auto findAlloca = metaDataKeyToValue.allocaDefinitions.find( + write.second.allocaDefMetadataKey); + if (findAlloca != metaDataKeyToValue.allocaDefinitions.end()) { + if (findAlloca->second.count == 4) { + RayPayloadElementCoverage[write.second.offset] = true; + } else if (findAlloca->second.count == 8) { + RayDescElementCoverage[write.second.offset] = true; + } + } + } + // Check that coverage for every element was emitted: + for (auto const &b : RayPayloadElementCoverage) + VERIFY_IS_TRUE(b); + for (auto const &b : RayDescElementCoverage) + VERIFY_IS_TRUE(b); +} diff --git a/tools/clang/unittests/HLSL/PixTestUtils.cpp b/tools/clang/unittests/HLSL/PixTestUtils.cpp index fad1a7e778..91b6c4479c 100644 --- a/tools/clang/unittests/HLSL/PixTestUtils.cpp +++ b/tools/clang/unittests/HLSL/PixTestUtils.cpp @@ -54,7 +54,7 @@ int ExtractMetaInt32Value(std::string const &token) { return -1; } std::map> -MetaDataKeyToRegisterNumber(std::vector const &lines) { +FindAllocaRelatedMetadata(std::vector const &lines) { // Find lines of the exemplary form // "!249 = !{i32 0, i32 20}" (in the case of a DXIL value) // "!196 = !{i32 1, i32 5, i32 1}" (in the case of a DXIL alloca reg) @@ -148,7 +148,7 @@ DxilRegisterToNameMap BuildDxilRegisterToNameMap(char const *disassembly) { auto lines = Tokenize(disassembly, "\n"); - auto metaDataKeyToValue = MetaDataKeyToRegisterNumber(lines); + auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines); for (auto const &line : lines) { if (line[0] == '!') {