Skip to content

Commit e2c4ea5

Browse files
authored
Cherry-pick -ftime-report and -ftime-trace patches to Release branch (#4880)
* Enable -ftime-report flag (#4736) This just surfaces and plumbs throug the -ftime-report clang flag in dxc.exe. -ftime-report prints timing data for different parts of the compile to help identify and track down performance issues. When using the dxcompiler library interface the output for the time report is returned as a DXC_OUT_TIME_REPORT buffer. I've set up the -ftime-report tests to run against the DXC command line. On Windows this usees the hcttestcmds file, on Linux (and windows if enabled) this will test through a lit shell suite. * Unbuffer raw_string_stream (#4872) Double buffering a string stream writing in memory is silly. Upstream LLVM made this change a few years ago based on analysis that most string implementations are sane: llvm/llvm-project@65b1361 * Backport -ftime-trace from upstream llvm & clang (#4873) * Backport -ftime-trace from upstream llvm & clang This change back-ports the -ftime-trace feature implemented by Aras Pranckevičius (@aras-p) into DXC. Aras has a great blog post describing the motivaiton for this feature and how to use it (see: https://aras-p.info/blog/2019/01/16/time-trace-timeline-flame-chart-prof iler-for-Clang/). As shaders are getting more and more complex we need better timing data for ourselves as compiler writers and our users as HLSL writers to know how to improve compile times of complex shaders. This implementation is a starting point to give us data to help drive our efforts to improve compile-time performance. Because DXC's LLVM is very far removed from the version of LLVM the patch was originally written for this change is not strictly a cherry-pick of llvm/llvm-project@d880de2d19d46f7cfea7aa593602 458440bc1e8d. Instead this change is a manually applied and reconstructed patch with significant alterations along the way. The alaterations are performed by myself, but credit for the patch remains with @aras-p. Patch by Aras Pranckevičius (@aras-p).
1 parent 650cd7f commit e2c4ea5

34 files changed

Lines changed: 521 additions & 32 deletions

include/dxc/Support/HLSLOptions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ class DxcOpts {
206206
bool ForceZeroStoreLifetimes = false; // OPT_force_zero_store_lifetimes
207207
bool EnableLifetimeMarkers = false; // OPT_enable_lifetime_markers
208208
bool ForceDisableLocTracking = false; // OPT_fdisable_loc_tracking
209+
bool TimeReport = false; // OPT_ftime_report
210+
std::string TimeTrace = ""; // OPT_ftime_trace[EQ]
209211

210212
// Optimization pass enables, disables and selects
211213
std::map<std::string, bool> DxcOptimizationToggles; // OPT_opt_enable & OPT_opt_disable

include/dxc/Support/HLSLOptions.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,15 @@ def fdisable_loc_tracking : Flag<["-"], "fdisable-loc-tracking">,
172172
Group<hlslcomp_Group>, Flags<[CoreOption]>,
173173
HelpText<"Disable source location tracking in IR. This will break diagnostic generation for late validation. (Ignored if /Zi is passed)">;
174174

175+
176+
def ftime_report : Flag<["-"], "ftime-report">, Group<hlslcomp_Group>, Flags<[CoreOption]>, HelpText<"Print time report">;
177+
def ftime_trace : Flag<["-"], "ftime-trace">,
178+
Group<hlslcomp_Group>, Flags<[CoreOption]>,
179+
HelpText<"Print hierchial time tracing to stdout">;
180+
def ftime_trace_EQ : Joined<["-"], "ftime-trace=">,
181+
Group<hlslcomp_Group>, Flags<[CoreOption]>,
182+
HelpText<"Print hierchial time tracing to file">;
183+
175184
/*
176185
def fno_caret_diagnostics : Flag<["-"], "fno-caret-diagnostics">, Group<hlslcomp_Group>,
177186
Flags<[CoreOption]>;

include/dxc/Support/dxcapi.impl.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,16 @@ inline DxcOutputType DxcGetOutputType(DXC_OUT_KIND kind) {
9797
case DXC_OUT_HLSL:
9898
case DXC_OUT_TEXT:
9999
case DXC_OUT_REMARKS:
100+
case DXC_OUT_TIME_REPORT:
101+
case DXC_OUT_TIME_TRACE:
100102
return DxcOutputType_Text;
101103
default:
102104
return DxcOutputType_None;
103105
}
104106
}
105107

106108
// Update when new results are allowed
107-
static const unsigned kNumDxcOutputTypes = DXC_OUT_REMARKS;
109+
static const unsigned kNumDxcOutputTypes = DXC_OUT_LAST;
108110
static const SIZE_T kAutoSize = (SIZE_T)-1;
109111
static const LPCWSTR DxcOutNoName = nullptr;
110112

include/dxc/dxcapi.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,11 +476,19 @@ typedef enum DXC_OUT_KIND {
476476
DXC_OUT_REFLECTION = 8, // IDxcBlob - RDAT part with reflection data
477477
DXC_OUT_ROOT_SIGNATURE = 9, // IDxcBlob - Serialized root signature output
478478
DXC_OUT_EXTRA_OUTPUTS = 10,// IDxcExtraResults - Extra outputs
479-
DXC_OUT_REMARKS = 11, // IDxcBlobUtf8 or IDxcBlobUtf16 - text directed at stdout
479+
DXC_OUT_REMARKS = 11, // IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout
480+
DXC_OUT_TIME_REPORT = 12, // IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout
481+
DXC_OUT_TIME_TRACE = 13, // IDxcBlobUtf8 or IDxcBlobWide - text directed at stdout
480482

483+
DXC_OUT_LAST = DXC_OUT_TIME_TRACE, // Last value for a counter
484+
485+
DXC_OUT_NUM_ENUMS,
481486
DXC_OUT_FORCE_DWORD = 0xFFFFFFFF
482487
} DXC_OUT_KIND;
483488

489+
static_assert(DXC_OUT_NUM_ENUMS == DXC_OUT_LAST + 1,
490+
"DXC_OUT_* Enum added and last value not updated.");
491+
484492
CROSS_PLATFORM_UUIDOF(IDxcResult, "58346CDA-DDE7-4497-9461-6F87AF5E0659")
485493
struct IDxcResult : public IDxcOperationResult {
486494
virtual BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) = 0;
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===- llvm/Support/TimeProfiler.h - Hierarchical Time Profiler -*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_SUPPORT_TIME_PROFILER_H
11+
#define LLVM_SUPPORT_TIME_PROFILER_H
12+
13+
#include "llvm/ADT/STLExtras.h"
14+
#include "llvm/Support/raw_ostream.h"
15+
16+
namespace llvm {
17+
18+
struct TimeTraceProfiler;
19+
extern TimeTraceProfiler *TimeTraceProfilerInstance;
20+
21+
/// Initialize the time trace profiler.
22+
/// This sets up the global \p TimeTraceProfilerInstance
23+
/// variable to be the profiler instance.
24+
void timeTraceProfilerInitialize();
25+
26+
/// Cleanup the time trace profiler, if it was initialized.
27+
void timeTraceProfilerCleanup();
28+
29+
/// Is the time trace profiler enabled, i.e. initialized?
30+
inline bool timeTraceProfilerEnabled() {
31+
return TimeTraceProfilerInstance != nullptr;
32+
}
33+
34+
/// Write profiling data to output file.
35+
/// Data produced is JSON, in Chrome "Trace Event" format, see
36+
/// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
37+
void timeTraceProfilerWrite(raw_ostream &OS);
38+
39+
/// Manually begin a time section, with the given \p Name and \p Detail.
40+
/// Profiler copies the string data, so the pointers can be given into
41+
/// temporaries. Time sections can be hierarchical; every Begin must have a
42+
/// matching End pair but they can nest.
43+
void timeTraceProfilerBegin(StringRef Name, StringRef Detail);
44+
void timeTraceProfilerBegin(StringRef Name,
45+
llvm::function_ref<std::string()> Detail);
46+
47+
/// Manually end the last time section.
48+
void timeTraceProfilerEnd();
49+
50+
/// The TimeTraceScope is a helper class to call the begin and end functions
51+
/// of the time trace profiler. When the object is constructed, it begins
52+
/// the section; and when it is destroyed, it stops it. If the time profiler
53+
/// is not initialized, the overhead is a single branch.
54+
struct TimeTraceScope {
55+
TimeTraceScope(StringRef Name, StringRef Detail) {
56+
if (TimeTraceProfilerInstance != nullptr)
57+
timeTraceProfilerBegin(Name, Detail);
58+
}
59+
TimeTraceScope(StringRef Name, llvm::function_ref<std::string()> Detail) {
60+
if (TimeTraceProfilerInstance != nullptr)
61+
timeTraceProfilerBegin(Name, Detail);
62+
}
63+
~TimeTraceScope() {
64+
if (TimeTraceProfilerInstance != nullptr)
65+
timeTraceProfilerEnd();
66+
}
67+
};
68+
69+
} // end namespace llvm
70+
71+
#endif

include/llvm/Support/raw_ostream.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ class raw_string_ostream : public raw_ostream {
516516
/// currently in the buffer.
517517
uint64_t current_pos() const override { return OS.size(); }
518518
public:
519-
explicit raw_string_ostream(std::string &O) : OS(O) {}
519+
explicit raw_string_ostream(std::string &O) : OS(O) { SetUnbuffered(); }
520520
~raw_string_ostream() override;
521521

522522
/// Flushes the stream contents to the target string and returns the string's

lib/Analysis/IPA/CallGraphSCCPass.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/Support/CommandLine.h"
2727
#include "llvm/Support/Debug.h"
2828
#include "llvm/Support/Timer.h"
29+
#include "llvm/Support/TimeProfiler.h" // HLSL Change
2930
#include "llvm/Support/raw_ostream.h"
3031
using namespace llvm;
3132

@@ -126,6 +127,12 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
126127
}
127128

128129
{
130+
// HLSL Change Begin - Support hierarchial time tracing.
131+
StringRef FnName = (*CurSCC.begin())->getFunction()
132+
? (*CurSCC.begin())->getFunction()->getName()
133+
: "Unnamed";
134+
TimeTraceScope FunctionScope("CGSCCPass-Function", FnName);
135+
// HLSL Change End - Support hierarchial time tracing.
129136
TimeRegion PassTimer(getPassTimer(CGSP));
130137
Changed = CGSP->runOnSCC(CurSCC);
131138
}
@@ -150,6 +157,9 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
150157
if (Function *F = CGN->getFunction()) {
151158
dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
152159
{
160+
// HLSL Change Begin - Support hierarchial time tracing.
161+
TimeTraceScope FunctionScope("CGSCCPass-Function", F->getName());
162+
// HLSL Change End - Support hierarchial time tracing.
153163
TimeRegion PassTimer(getPassTimer(FPP));
154164
Changed |= FPP->runOnFunction(*F);
155165
}
@@ -398,6 +408,9 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
398408
for (unsigned PassNo = 0, e = getNumContainedPasses();
399409
PassNo != e; ++PassNo) {
400410
Pass *P = getContainedPass(PassNo);
411+
// HLSL Change Begin - Support hierarchial time tracing.
412+
TimeTraceScope PassScope("RunCallGraphSCCPass", P->getPassName());
413+
// HLSL Change End - Support hierarchial time tracing.
401414

402415
// If we're in -debug-pass=Executions mode, construct the SCC node list,
403416
// otherwise avoid constructing this string as it is expensive.

lib/DxcSupport/HLSLOptions.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
781781
!Args.hasFlag(OPT_disable_lifetime_markers, OPT_INVALID, false);
782782
opts.ForceDisableLocTracking =
783783
Args.hasFlag(OPT_fdisable_loc_tracking, OPT_INVALID, false);
784+
opts.TimeReport = Args.hasFlag(OPT_ftime_report, OPT_INVALID, false);
785+
opts.TimeTrace = Args.hasFlag(OPT_ftime_trace, OPT_INVALID, false) ? "-" : "";
786+
if (Args.hasArg(OPT_ftime_trace_EQ))
787+
opts.TimeTrace = Args.getLastArgValue(OPT_ftime_trace_EQ);
784788
opts.EnablePayloadQualifiers = Args.hasFlag(OPT_enable_payload_qualifiers, OPT_INVALID,
785789
DXIL::CompareVersions(Major, Minor, 6, 7) >= 0);
786790

lib/IR/LegacyPassManager.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/Support/Mutex.h"
2525
#include "llvm/Support/TimeValue.h"
2626
#include "llvm/Support/Timer.h"
27+
#include "llvm/Support/TimeProfiler.h"
2728
#include "llvm/Support/raw_ostream.h"
2829
#include <algorithm>
2930
#include <map>
@@ -1321,7 +1322,7 @@ bool BBPassManager::runOnFunction(Function &F) {
13211322

13221323
bool Changed = doInitialization(F);
13231324

1324-
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
1325+
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
13251326
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
13261327
BasicBlockPass *BP = getContainedPass(Index);
13271328
bool LocalChanged = false;
@@ -1552,10 +1553,18 @@ bool FPPassManager::runOnFunction(Function &F) {
15521553
// Collect inherited analysis from Module level pass manager.
15531554
populateInheritedAnalysis(TPM->activeStack);
15541555

1556+
// HLSL Change Begin - Support hierarchial time tracing.
1557+
llvm::TimeTraceScope FunctionScope("OptFunction", F.getName());
1558+
// HLSL Change End
1559+
15551560
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
15561561
FunctionPass *FP = getContainedPass(Index);
15571562
bool LocalChanged = false;
15581563

1564+
// HLSL Change Begin - Support hierarchial time tracing.
1565+
llvm::TimeTraceScope PassScope("RunFunctionPass", FP->getPassName());
1566+
// HLSL Change End - Support hierarchial time tracing.
1567+
15591568
dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
15601569
dumpRequiredSet(FP);
15611570

@@ -1578,12 +1587,14 @@ bool FPPassManager::runOnFunction(Function &F) {
15781587
recordAvailableAnalysis(FP);
15791588
removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
15801589
}
1590+
15811591
return Changed;
15821592
}
15831593

15841594
bool FPPassManager::runOnModule(Module &M) {
15851595
bool Changed = false;
15861596

1597+
llvm::TimeTraceScope TimeScope("OptModule", M.getName());
15871598
for (Function &F : M)
15881599
Changed |= runOnFunction(F);
15891600

@@ -1616,6 +1627,8 @@ bool FPPassManager::doFinalization(Module &M) {
16161627
/// the module, and if so, return true.
16171628
bool
16181629
MPPassManager::runOnModule(Module &M) {
1630+
llvm::TimeTraceScope TimeScope("OptModule", M.getName());
1631+
16191632
bool Changed = false;
16201633

16211634
// Initialize on-the-fly passes
@@ -1632,6 +1645,8 @@ MPPassManager::runOnModule(Module &M) {
16321645
ModulePass *MP = getContainedPass(Index);
16331646
bool LocalChanged = false;
16341647

1648+
llvm::TimeTraceScope PassScope("RunModulePass", MP->getPassName());
1649+
16351650
dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
16361651
dumpRequiredSet(MP);
16371652

lib/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ add_llvm_library(LLVMSupport
131131
TargetRegistry.cpp
132132
ThreadLocal.cpp
133133
Threading.cpp
134+
TimeProfiler.cpp # HLSL Change - Support hierarchial time tracing.
134135
TimeValue.cpp
135136
Valgrind.cpp
136137
Watchdog.cpp

0 commit comments

Comments
 (0)