Skip to content

Commit

Permalink
PIX: split vector alloca writes to enable debug instrumentation (#6990)
Browse files Browse the repository at this point in the history
Bit of background: PIX's debug passes add new allocas, stores to which
tie debug info to DXIL Values.
In the case of a preexisting alloca, PIX will still add its debug
writes, but the codegen may have emitted vector-valued stores.
Concretely, this happens for the ray payload and RayDesc structs in DXR.
Those vector-values stores were being treated incorrectly, resulting in
missing values in the PIX shader debugger.

This change splits vector-valued stores to such allocas into
extractelement/scalar-stores, which enables the rest of the PIX
instrumentation to function as expected.

(Worth noting that this change only applies to the PIX shader debugging
pass.)
  • Loading branch information
jeffnn authored Dec 4, 2024
1 parent eb1223e commit a03a77f
Show file tree
Hide file tree
Showing 3 changed files with 258 additions and 11 deletions.
82 changes: 73 additions & 9 deletions lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class DxilAnnotateWithVirtualRegister : public llvm::ModulePass {
private:
void AnnotateValues(llvm::Instruction *pI);
void AnnotateStore(llvm::Instruction *pI);
void SplitVectorStores(hlsl::OP *HlslOP, llvm::Instruction *pI);
bool IsAllocaRegisterWrite(llvm::Value *V, llvm::AllocaInst **pAI,
llvm::Value **pIdx);
void AnnotateAlloca(llvm::AllocaInst *pAlloca);
Expand Down Expand Up @@ -133,6 +134,15 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) {
auto instrumentableFunctions =
PIXPassHelpers::GetAllInstrumentableFunctions(*m_DM);

for (auto *F : instrumentableFunctions) {
for (auto &block : F->getBasicBlockList()) {
for (auto it = block.begin(); it != block.end();) {
llvm::Instruction *I = &*(it++);
SplitVectorStores(m_DM->GetOP(), I);
}
}
}

for (auto *F : instrumentableFunctions) {
for (auto &block : F->getBasicBlockList()) {
for (llvm::Instruction &I : block.getInstList()) {
Expand Down Expand Up @@ -297,15 +307,37 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite(
return false;
}
// And of course the member we're after might not be at the beginning of
// the struct:
auto *pStructType = llvm::dyn_cast<llvm::StructType>(
pPointerGEP->getPointerOperandType()->getPointerElementType());
auto *pStructMember =
llvm::dyn_cast<llvm::ConstantInt>(pPointerGEP->getOperand(2));
uint64_t memberIndex = pStructMember->getLimitedValue();
for (uint64_t i = 0; i < memberIndex; ++i) {
precedingMemberCount +=
CountStructMembers(pStructType->getStructElementType(i));
// any containing struct:
if (auto *pStructType = llvm::dyn_cast<llvm::StructType>(
pPointerGEP->getPointerOperandType()
->getPointerElementType())) {
auto *pStructMember =
llvm::dyn_cast<llvm::ConstantInt>(pPointerGEP->getOperand(2));
uint64_t memberIndex = pStructMember->getLimitedValue();
for (uint64_t i = 0; i < memberIndex; ++i) {
precedingMemberCount +=
CountStructMembers(pStructType->getStructElementType(i));
}
}

// And the source pointer may be a vector (floatn) type,
// and if so, that's another offset to consider.
llvm::Type *DestType = pGEP->getPointerOperand()->getType();
// We expect this to be a pointer type (it's a GEP after all):
if (DestType->isPointerTy()) {
llvm::Type *PointedType = DestType->getPointerElementType();
// Being careful to check num operands too in order to avoid false
// positives:
if (PointedType->isVectorTy() && pGEP->getNumOperands() == 3) {
// Fetch the second deref (in operand 2).
// (the first derefs the pointer to the "floatn",
// and the second denotes the index into the floatn.)
llvm::Value *vectorIndex = pGEP->getOperand(2);
if (auto *constIntIIndex =
llvm::cast<llvm::ConstantInt>(vectorIndex)) {
precedingMemberCount += constIntIIndex->getLimitedValue();
}
}
}
} else {
return false;
Expand Down Expand Up @@ -365,6 +397,8 @@ void DxilAnnotateWithVirtualRegister::AnnotateAlloca(
AssignNewAllocaRegister(pAlloca, 1);
} else if (auto *AT = llvm::dyn_cast<llvm::ArrayType>(pAllocaTy)) {
AssignNewAllocaRegister(pAlloca, AT->getNumElements());
} else if (auto *VT = llvm::dyn_cast<llvm::VectorType>(pAllocaTy)) {
AssignNewAllocaRegister(pAlloca, VT->getNumElements());
} else if (auto *ST = llvm::dyn_cast<llvm::StructType>(pAllocaTy)) {
AssignNewAllocaRegister(pAlloca, CountStructMembers(ST));
} else {
Expand Down Expand Up @@ -433,6 +467,36 @@ void DxilAnnotateWithVirtualRegister::AssignNewAllocaRegister(
m_uVReg += C;
}

void DxilAnnotateWithVirtualRegister::SplitVectorStores(hlsl::OP *HlslOP,
llvm::Instruction *pI) {
auto *pSt = llvm::dyn_cast<llvm::StoreInst>(pI);
if (pSt == nullptr) {
return;
}

llvm::AllocaInst *Alloca;
llvm::Value *Index;
if (!IsAllocaRegisterWrite(pSt->getPointerOperand(), &Alloca, &Index)) {
return;
}

llvm::Type *SourceType = pSt->getValueOperand()->getType();
if (SourceType->isVectorTy()) {
if (auto *constIntIIndex = llvm::cast<llvm::ConstantInt>(Index)) {
// break vector alloca stores up into individual stores
llvm::IRBuilder<> B(pSt);
for (uint64_t el = 0; el < SourceType->getVectorNumElements(); ++el) {
llvm::Value *destPointer = B.CreateGEP(pSt->getPointerOperand(),
{B.getInt32(0), B.getInt32(el)});
llvm::Value *source =
B.CreateExtractElement(pSt->getValueOperand(), el);
B.CreateStore(source, destPointer);
}
pI->eraseFromParent();
}
}
}

} // namespace

using namespace llvm;
Expand Down
183 changes: 183 additions & 0 deletions tools/clang/unittests/HLSL/PixTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#endif

#include <algorithm>
#include <array>
#include <cassert>
#include <cfloat>
#include <map>
Expand Down Expand Up @@ -149,6 +150,8 @@ class PixTest : public ::testing::Test {
TEST_METHOD(DebugInstrumentation_TextOutput)
TEST_METHOD(DebugInstrumentation_BlockReport)

TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs)

dxc::DxcDllSupport m_dllSupport;
VersionSupportInfo m_ver;

Expand Down Expand Up @@ -442,6 +445,7 @@ class PixTest : public ::testing::Test {
CComPtr<IDxcBlob> RunDxilPIXDXRInvocationsLog(IDxcBlob *blob);
void TestPixUAVCase(char const *hlsl, wchar_t const *model,
wchar_t const *entry);
std::string Disassemble(IDxcBlob *pProgram);
};

bool PixTest::InitSupport() {
Expand Down Expand Up @@ -1151,6 +1155,16 @@ static bool FindStructMemberFromStore(llvm::StoreInst *S,
return false;
}

std::string PixTest::Disassemble(IDxcBlob *pProgram) {
CComPtr<IDxcCompiler> pCompiler;
CComPtr<IDxcOperationResult> pResult;
CComPtr<IDxcBlobEncoding> pSource;
VERIFY_SUCCEEDED(CreateCompiler(m_dllSupport, &pCompiler));
CComPtr<IDxcBlobEncoding> pDisassembly;
VERIFY_SUCCEEDED(pCompiler->Disassemble(pProgram, &pDisassembly));
return BlobToUtf8(pDisassembly);
}

// This function lives in lib\DxilPIXPasses\DxilAnnotateWithVirtualRegister.cpp
// Declared here so we can test it.
uint32_t CountStructMembers(llvm::Type const *pType);
Expand Down Expand Up @@ -3023,3 +3037,172 @@ float4 main() : SV_Target {
VERIFY_IS_TRUE(foundDoubleAssignment);
VERIFY_IS_TRUE(found32BitAllocaStore);
}

std::string ExtractBracedSubstring(std::string const &line) {
auto open = line.find('{');
auto close = line.find('}');
if (open != std::string::npos && close != std::string::npos &&
open + 1 < close) {
return line.substr(open + 1, close - open - 1);
}
return {};
}

int ExtractMetaInt32Value(std::string const &token) {
auto findi32 = token.find("i32 ");
if (findi32 != std::string_view::npos) {
return atoi(
std::string(token.data() + findi32 + 4, token.length() - (findi32 + 4))
.c_str());
}
return -1;
}

std::vector<std::string> Split(std::string str, char delimeter) {
std::vector<std::string> lines;

auto const *p = str.data();
auto const *justPastPreviousDelimiter = p;
while (p < str.data() + str.length()) {
if (*p == delimeter) {
lines.emplace_back(std::string(justPastPreviousDelimiter,
p - justPastPreviousDelimiter));
justPastPreviousDelimiter = p + 1;
p = justPastPreviousDelimiter;
} else {
p++;
}
}

lines.emplace_back(
std::string(justPastPreviousDelimiter, p - justPastPreviousDelimiter));

return lines;
}

struct MetadataAllocaDefinition {
int base;
int count;
};
using AllocaDefinitions = std::map<int, MetadataAllocaDefinition>;
struct MetadataAllocaWrite {
int allocaDefMetadataKey;
int offset;
int size;
};
using AllocaWrites = std::map<int, MetadataAllocaWrite>;

struct AllocaMetadata {
AllocaDefinitions allocaDefinitions;
AllocaWrites allocaWrites;
std::vector<int> allocaWritesMetaKeys;
};

AllocaMetadata
FindAllocaRelatedMetadata(std::vector<std::string> const &lines) {

const char *allocaMetaDataAssignment = "= !{i32 1, ";
const char *allocaRegWRiteAssignment = "= !{i32 2, !";
const char *allocaRegWriteTag = "!pix-alloca-reg-write !";

AllocaMetadata ret;
for (auto const &line : lines) {
if (line[0] == '!') {
auto key = atoi(std::string(line.data() + 1, line.length() - 1).c_str());
if (key != -1) {
if (line.find(allocaMetaDataAssignment) != std::string::npos) {
std::string bitInBraces = ExtractBracedSubstring(line);
if (bitInBraces != "") {
auto tokens = Split(bitInBraces, ',');
if (tokens.size() == 3) {
auto value0 = ExtractMetaInt32Value(tokens[1]);
auto value1 = ExtractMetaInt32Value(tokens[2]);
if (value0 != -1 && value1 != -1) {
MetadataAllocaDefinition def;
def.base = value0;
def.count = value1;
ret.allocaDefinitions[key] = def;
}
}
}
} else if (line.find(allocaRegWRiteAssignment) != std::string::npos) {
std::string bitInBraces = ExtractBracedSubstring(line);
if (bitInBraces != "") {
auto tokens = Split(bitInBraces, ',');
if (tokens.size() == 4 && tokens[1][1] == '!') {
auto allocaKey = atoi(tokens[1].c_str() + 2);
auto value0 = ExtractMetaInt32Value(tokens[2]);
auto value1 = ExtractMetaInt32Value(tokens[3]);
if (value0 != -1 && value1 != -1) {
MetadataAllocaWrite aw;
aw.allocaDefMetadataKey = allocaKey;
aw.size = value0;
aw.offset = value1;
ret.allocaWrites[key] = aw;
}
}
}
}
}
} else {
auto findAw = line.find(allocaRegWriteTag);
if (findAw != std::string::npos) {
ret.allocaWritesMetaKeys.push_back(
atoi(line.c_str() + findAw + strlen(allocaRegWriteTag)));
}
}
}
return ret;
}

TEST_F(PixTest, DebugInstrumentation_VectorAllocaWrite_Structs) {
const char *source = R"x(
RaytracingAccelerationStructure Scene : register(t0, space0);
struct RayPayload
{
float4 color;
};
RWStructuredBuffer<float> UAV: register(u0);
[shader("raygeneration")]
void RaygenInternalName()
{
RayDesc ray;
ray.Origin = float3(UAV[0], UAV[1],UAV[3]);
ray.Direction = float3(4.4,5.5,6.6);
ray.TMin = 0.001;
ray.TMax = 10000.0;
RayPayload payload = { float4(0, 1, 0, 1) };
TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload);
})x";

auto compiled = Compile(m_dllSupport, source, L"lib_6_6", {L"-Od"});
auto output = RunDebugPass(compiled);
auto disassembly = Disassemble(output.blob);
auto lines = Split(disassembly, '\n');
auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines);
// To validate that the RayDesc and RayPayload instances were fully covered,
// check that there are alloca writes that cover all of them. RayPayload
// has four elements, and RayDesc has eight.
std::array<bool, 4> RayPayloadElementCoverage;
std::array<bool, 8> RayDescElementCoverage;

for (auto const &write : metaDataKeyToValue.allocaWrites) {
// the whole point of the changes with this test is to separate vector
// writes into individual elements:
VERIFY_ARE_EQUAL(1, write.second.size);
auto findAlloca = metaDataKeyToValue.allocaDefinitions.find(
write.second.allocaDefMetadataKey);
if (findAlloca != metaDataKeyToValue.allocaDefinitions.end()) {
if (findAlloca->second.count == 4) {
RayPayloadElementCoverage[write.second.offset] = true;
} else if (findAlloca->second.count == 8) {
RayDescElementCoverage[write.second.offset] = true;
}
}
}
// Check that coverage for every element was emitted:
for (auto const &b : RayPayloadElementCoverage)
VERIFY_IS_TRUE(b);
for (auto const &b : RayDescElementCoverage)
VERIFY_IS_TRUE(b);
}
4 changes: 2 additions & 2 deletions tools/clang/unittests/HLSL/PixTestUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ int ExtractMetaInt32Value(std::string const &token) {
return -1;
}
std::map<int, std::pair<int, int>>
MetaDataKeyToRegisterNumber(std::vector<std::string> const &lines) {
FindAllocaRelatedMetadata(std::vector<std::string> const &lines) {
// Find lines of the exemplary form
// "!249 = !{i32 0, i32 20}" (in the case of a DXIL value)
// "!196 = !{i32 1, i32 5, i32 1}" (in the case of a DXIL alloca reg)
Expand Down Expand Up @@ -148,7 +148,7 @@ DxilRegisterToNameMap BuildDxilRegisterToNameMap(char const *disassembly) {

auto lines = Tokenize(disassembly, "\n");

auto metaDataKeyToValue = MetaDataKeyToRegisterNumber(lines);
auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines);

for (auto const &line : lines) {
if (line[0] == '!') {
Expand Down

0 comments on commit a03a77f

Please sign in to comment.