Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
BinPackParameters: false
ColumnLimit: 100
ForEachMacros:
- _ZE_FOREACH_SLOT
- DL_FOREACH
- DL_FOREACH_SAFE
- DL_FOREACH_SAFE2
- HASH_ITER
7 changes: 5 additions & 2 deletions backends/ze/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,12 @@ TRACE_COMMON = \
tests/interval_profiling_normal.thapi_text_pretty \
tests/interval_profiling_multithread.thapi_text_pretty \
tests/interval_profiling_API_call.thapi_text_pretty \
tests/interval_profiling_fast.thapi_text_pretty \
tests/interval_profiling_interleave_process.thapi_text_pretty \
tests/interval_profiling_ignore.thapi_text_pretty
tests/interval_profiling_ignore.thapi_text_pretty \
tests/interval_profiling_shared_event.thapi_text_pretty \
tests/interval_profiling_resubmit_event.thapi_text_pretty \
tests/interval_profiling_shared_event_resubmit.thapi_text_pretty \
tests/interval_profiling_shared_event_xphase.thapi_text_pretty

BTX_ZE_GENERATED_SOURCE_TEST = \
btx_source_ze_test/metababel/metababel.h \
Expand Down
63 changes: 50 additions & 13 deletions backends/ze/btx_zeinterval_callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,20 @@ static void hSignalEvent_rest_entry_callback(void *btx_handle,
hCommandList, name, ts, btx_event_t::OTHER, {}};
}

static void zeCommandListAppendSignalEvent_entry_callback(void *btx_handle,
void *usr_data,
int64_t ts,
const char *hostname,
int64_t vpid,
uint64_t vtid,
ze_command_list_handle_t hCommandList,
ze_event_handle_t hEvent) {
(void)hEvent;
auto *data = static_cast<data_t *>(usr_data);
data->threadToLastLaunchInfo[{hostname, vpid, vtid}] = {
hCommandList, "zeCommandListAppendSignalEvent", ts, btx_event_t::SIGNAL, {}};
}

/*
* _ _ _
* _ _ / _ ._ _ ._ _ _. ._ _| / \ _ _ |_ _ _ _|_ _
Expand All @@ -584,9 +598,11 @@ zeCommandQueueExecuteCommandLists_entry_callback(void *btx_handle,
const auto commandQueueDesc = data->commandQueueToDesc[{hostname, vpid, hCommandQueue}];
for (size_t i = 0; i < _phCommandLists_vals_length; i++) {
for (auto &hEvent : data->commandListToEvents[{hostname, vpid, phCommandLists_vals[i]}]) {
auto &h = data->eventToBtxDesct[{hostname, vpid, hEvent}];
std::get<ze_command_queue_desc_t>(h) = commandQueueDesc;
std::get<int64_t>(h) = ts;
auto &ring = data->eventToBtxDesct[{hostname, vpid, hEvent}];
for (auto &h : ring.entries) {
std::get<ze_command_queue_desc_t>(h) = commandQueueDesc;
std::get<int64_t>(h) = ts;
}
}
}
}
Expand Down Expand Up @@ -825,11 +841,16 @@ static void event_profiling_callback(void *btx_handle,
}

// If not IMM will be commandQueueDesc overwrited latter
data->eventToBtxDesct[{hostname, vpid, hEvent}] = {vtid, commandQueueDesc,
hCommandList, hCommandListIsImmediate,
hDevice, commandName,
ts_min, clockLttngDevice,
type, ptr};
// Push onto the per-event ring. If the cursor has advanced (we've
// already consumed at least one result for this event), the prior
// ring belongs to a finished build phase — clear and start fresh.
auto &ring = data->eventToBtxDesct[{hostname, vpid, hEvent}];
if (ring.cursor > 0) {
ring.entries.clear();
ring.cursor = 0;
}
ring.entries.push_back({vtid, commandQueueDesc, hCommandList, hCommandListIsImmediate, hDevice,
commandName, ts_min, clockLttngDevice, type, ptr});
// Prepare job for non IMM
if (!hCommandListIsImmediate)
data->commandListToEvents[{hostname, vpid, hCommandList}].insert(hEvent);
Expand Down Expand Up @@ -880,14 +901,17 @@ static void event_profiling_result_callback(void *btx_handle,

auto *data = static_cast<data_t *>(usr_data);

// TODO: Should we always find the eventToBtxDesct?
// We didn't find the partial payload, that mean we should ignore it
// Read the current ring slot for this event; advance the cursor;
// wrap to 0 on overflow. Resubmits re-cycle through the same ring.
const auto it_p = data->eventToBtxDesct.find({hostname, vpid, hEvent});
if (it_p == data->eventToBtxDesct.cend())
if (it_p == data->eventToBtxDesct.cend() || it_p->second.entries.empty())
return;
// We don't erase, may have one entry for multiple result
auto &ring = it_p->second;
if (ring.cursor >= ring.entries.size())
ring.cursor = 0;
const auto &[vtid_submission, commandQueueDesc, hCommandList, hCommandListIsImmediate, device,
commandName, lltngMin, clockLttngDevice, type, ptr] = it_p->second;
commandName, lltngMin, clockLttngDevice, type, ptr] = ring.entries[ring.cursor];
ring.cursor++;
std::string metadata = "";
{
std::stringstream ss_metadata;
Expand All @@ -901,6 +925,13 @@ static void event_profiling_result_callback(void *btx_handle,
if (!hCommandListIsImmediate)
data->commandListToEvents[{hostname, vpid, hCommandList}].erase(hEvent);

/* AppendSignalEvent is a host-side signal with no GPU work to time.
* We pushed a ring entry to keep state consistent (so a future
* profiling_results lookup doesn't walk a stale prior entry), but
* suppress the device-side tally emission here. */
if (type == btx_event_t::SIGNAL)
return;

if ((type == btx_event_t::TRAFFIC) && (status == ZE_RESULT_SUCCESS)) {
auto &[ts, size] = std::get<btx_additional_info_traffic_t>(ptr);
btx_push_message_lttng_traffic(btx_handle, hostname, vpid, vtid, ts, BACKEND_ZE,
Expand Down Expand Up @@ -1400,6 +1431,12 @@ void btx_register_usr_callbacks(void *btx_handle) {
REGISTER_ASSOCIATED_CALLBACK(eventMemory_without_hSignalEvent_exit);
REGISTER_ASSOCIATED_CALLBACK(hSignalEvent_rest_entry);

/* zeCommandListAppendSignalEvent doesn't match the hSignalEvent_* sets
* (payload is `hEvent`, not `hSignalEvent`), so it needs its own entry
* callback to keep threadToLastLaunchInfo from going stale. */
btx_register_callbacks_lttng_ust_ze_zeCommandListAppendSignalEvent_entry(
btx_handle, &zeCommandListAppendSignalEvent_entry_callback);

/* Remove Memory */
REGISTER_ASSOCIATED_CALLBACK(memFree_entry);
REGISTER_ASSOCIATED_CALLBACK(memFree_exit);
Expand Down
17 changes: 15 additions & 2 deletions backends/ze/btx_zeinterval_callbacks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ using btx_kernel_group_size_t = std::tuple<uint32_t, uint32_t, uint32_t>;
using btx_kernel_desct_t =
std::tuple<std::string /*ze_kernel_desc_t*/, ze_kernel_properties_t, btx_kernel_group_size_t>;

enum class btx_event_t { TRAFFIC, KERNEL, OTHER };
// SIGNAL = zeCommandListAppendSignalEvent. Ring entry is created so state
// stays consistent, but filtered out of the device tally (no GPU work).
enum class btx_event_t { TRAFFIC, KERNEL, SIGNAL, OTHER };
using btx_additional_info_traffic_t = std::tuple<int64_t /*ts*/, size_t /*size*/>;
using btx_additional_info_kernel_t = std::string /*metadata*/;
using btx_additional_info =
Expand Down Expand Up @@ -93,7 +95,18 @@ struct data_s {
std::unordered_map<hp_command_queue_t, ze_command_queue_desc_t> commandQueueToDesc;

std::unordered_map<hpt_t, btx_launch_desc_t> threadToLastLaunchInfo;
std::unordered_map<hp_event_t, btx_event_desct_t> eventToBtxDesct;

/* Per-event metadata ring. An hEvent can be the signal event of N
* Appends in one build phase, and the cl can be resubmitted M times,
* yielding M*N result events. We store the N Appends as a vector and
* advance `cursor` per result, wrapping at the end. A new push that
* arrives after the cursor advanced indicates a new build phase —
* we clear and start over so the ring tracks only the current phase. */
struct event_ring_t {
std::vector<btx_event_desct_t> entries;
size_t cursor = 0;
};
std::unordered_map<hp_event_t, event_ring_t> eventToBtxDesct;
// Require for non IMM
std::unordered_map<hp_command_list_t, std::unordered_set<ze_event_handle_t>> commandListToEvents;

Expand Down
6 changes: 5 additions & 1 deletion backends/ze/gen_ze.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#include <alloca.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
Expand Down Expand Up @@ -228,7 +229,10 @@ def gen_struct_printer(namespace, types)
puts <<EOF
_init_tracer();
EOF
if c.name == 'zeInit'
# _init_tracer_dump() calls the real zeInit (ZE_INIT_PTR) and dumps device
# properties. zesInit piggybacks on it so a pure-Sysman program (no zeInit)
# still initializes the ze backend it depends on.
if %w[zeInit zesInit].include?(c.name)
puts <<EOF
_init_tracer_dump();
EOF
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 20, err = false }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 10, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 30, err = false }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 30, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# 1 Append, but the underlying cl is Executed twice in a real run, so
# 2 results arrive for the same hEvent. Both are attributed to that one
# Append.
12:00:00.000000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_entry: { hCommandList: 0x1000000000000000, hSignalEvent: 0x0000000000000000, numWaitEvents: 0, phWaitEvents: 0x0000000000000000, _phWaitEvents_vals_length: 0, phWaitEvents_vals: 0x0000000000000000 }
12:00:00.010000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x1000000000000000 }
12:00:00.020000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x1000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 0, globalEnd: 10, contextStart: 0, contextEnd: 10 }
12:00:00.030000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.010000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x4000000000000000 }
12:00:00.020000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.100000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 0, globalEnd: 10, contextStart: 0, contextEnd: 10 }
12:00:00.200000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 100, globalEnd: 130, contextStart: 100, contextEnd: 130 }
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 20, err = false }
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000100, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 20, err = false }
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000200, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 20, err = false }
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000300, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 20, err = false }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 10, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000100, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 30, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000200, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 40, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000300, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 50, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# 4 Appends share one hEvent. Each Append's result is attributed back to
# its own Append, in submission order.
12:00:00.000000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_entry: { hCommandList: 0x1000000000000000, hSignalEvent: 0x0000000000000000, numWaitEvents: 0, phWaitEvents: 0x0000000000000000, _phWaitEvents_vals_length: 0, phWaitEvents_vals: 0x0000000000000000 }
12:00:00.010000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x4000000000000000 }
12:00:00.020000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.100000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_entry: { hCommandList: 0x1000000000000000, hSignalEvent: 0x0000000000000000, numWaitEvents: 0, phWaitEvents: 0x0000000000000000, _phWaitEvents_vals_length: 0, phWaitEvents_vals: 0x0000000000000000 }
12:00:00.110000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x4000000000000000 }
12:00:00.120000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.200000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_entry: { hCommandList: 0x1000000000000000, hSignalEvent: 0x0000000000000000, numWaitEvents: 0, phWaitEvents: 0x0000000000000000, _phWaitEvents_vals_length: 0, phWaitEvents_vals: 0x0000000000000000 }
12:00:00.210000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x4000000000000000 }
12:00:00.220000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.300000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_entry: { hCommandList: 0x1000000000000000, hSignalEvent: 0x0000000000000000, numWaitEvents: 0, phWaitEvents: 0x0000000000000000, _phWaitEvents_vals_length: 0, phWaitEvents_vals: 0x0000000000000000 }
12:00:00.310000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x4000000000000000 }
12:00:00.320000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.400000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 0, globalEnd: 10, contextStart: 0, contextEnd: 10 }
12:00:00.410000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 100, globalEnd: 130, contextStart: 100, contextEnd: 130 }
12:00:00.420000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 200, globalEnd: 240, contextStart: 200, contextEnd: 240 }
12:00:00.430000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 300, globalEnd: 350, contextStart: 300, contextEnd: 350 }
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 20, err = false }
lttng:host: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000100, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 20, err = false }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 10, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000100, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 30, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000000, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 40, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
lttng:device: { hostname = "testhost", vpid = 10, vtid = 1, ts = 1704110400000000100, backend = 1 }, { name = "zeCommandListAppendBarrier", dur = 50, did = 0, sdid = 0, err = false, metadata = "{ordinal: 0, index: 0}" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# 2 Appends share one hEvent, then the underlying cl is Executed twice,
# so 4 results arrive. Each submission's pair of results is attributed to
# the two Appends in submission order.
12:00:00.000000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_entry: { hCommandList: 0x1000000000000000, hSignalEvent: 0x0000000000000000, numWaitEvents: 0, phWaitEvents: 0x0000000000000000, _phWaitEvents_vals_length: 0, phWaitEvents_vals: 0x0000000000000000 }
12:00:00.010000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x4000000000000000 }
12:00:00.020000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.100000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_entry: { hCommandList: 0x1000000000000000, hSignalEvent: 0x0000000000000000, numWaitEvents: 0, phWaitEvents: 0x0000000000000000, _phWaitEvents_vals_length: 0, phWaitEvents_vals: 0x0000000000000000 }
12:00:00.110000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling: { hEvent: 0x4000000000000000 }
12:00:00.120000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze:zeCommandListAppendBarrier_exit: { zeResult: ZE_RESULT_SUCCESS }
12:00:00.200000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 0, globalEnd: 10, contextStart: 0, contextEnd: 10 }
12:00:00.210000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 100, globalEnd: 130, contextStart: 100, contextEnd: 130 }
12:00:00.300000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 200, globalEnd: 240, contextStart: 200, contextEnd: 240 }
12:00:00.310000000 - testhost - vpid: 10, vtid: 1 - lttng_ust_ze_profiling:event_profiling_results: { hEvent: 0x4000000000000000, status: ZE_RESULT_SUCCESS, timestampStatus: ZE_RESULT_SUCCESS, globalStart: 300, globalEnd: 350, contextStart: 300, contextEnd: 350 }
Loading
Loading