initial add

This commit is contained in:
2026-01-09 11:11:12 -06:00
commit b8166afbbd
12 changed files with 5716 additions and 0 deletions

3
cell.toml Normal file
View File

@@ -0,0 +1,3 @@
[compilation]
LDFLAGS = "-ltracy"
CFLAGS = "-I/opt/homebrew/include/tracy"

135
tracy.c Normal file
View File

@@ -0,0 +1,135 @@
#include "cell.h"
#define TRACY_ENABLE
#include "TracyC.h"
#define MAX_ZONE_DEPTH 256
typedef struct {
TracyCZoneCtx zone_stack[MAX_ZONE_DEPTH];
int zone_depth;
} tracy_context;
static void tracy_hook(JSContext *js, int type, struct js_debug *dbg, void *user)
{
tracy_context *ctx = (tracy_context *)user;
if (!ctx) return;
if (type == JS_HOOK_CALL) {
if (ctx->zone_depth >= MAX_ZONE_DEPTH) return;
uint64_t srcloc = ___tracy_alloc_srcloc(
dbg->line,
dbg->filename ? dbg->filename : "<unknown>",
dbg->filename ? strlen(dbg->filename) : 9,
dbg->name ? dbg->name : "<anonymous>",
dbg->name ? strlen(dbg->name) : 11,
0
);
ctx->zone_stack[ctx->zone_depth] = ___tracy_emit_zone_begin_alloc(srcloc, 1);
___tracy_emit_zone_color(ctx->zone_stack[ctx->zone_depth], dbg->unique);
ctx->zone_depth++;
}
else if (type == JS_HOOK_RET) {
if (ctx->zone_depth <= 0) return;
ctx->zone_depth--;
___tracy_emit_zone_end(ctx->zone_stack[ctx->zone_depth]);
}
}
static void tracy_cell_hook(const char *name, int type)
{
printf("%s\n", name);
#ifdef TRACY_FIBERS
if (type == CELL_HOOK_ENTER)
___tracy_fiber_enter(name);
else if (type == CELL_HOOK_EXIT)
___tracy_fiber_leave();
#endif
}
static JSValue js_tracy_init(JSContext *js, JSValue self, int argc, JSValue *argv)
{
tracy_context *ctx = malloc(sizeof(tracy_context));
if (!ctx) return JS_EXCEPTION;
ctx->zone_depth = 0;
memset(ctx->zone_stack, 0, sizeof(ctx->zone_stack));
js_debug_sethook(js, tracy_hook, JS_HOOK_CALL | JS_HOOK_RET, ctx);
cell_trace_sethook(tracy_cell_hook);
return JS_NULL;
}
static JSValue js_tracy_frame(JSContext *js, JSValue self, int argc, JSValue *argv)
{
___tracy_emit_frame_mark(NULL);
return JS_NULL;
}
static JSValue js_tracy_message(JSContext *js, JSValue self, int argc, JSValue *argv)
{
if (argc < 1) return JS_NULL;
const char *msg = JS_ToCString(js, argv[0]);
if (!msg) return JS_EXCEPTION;
___tracy_emit_messageL(msg, 0);
JS_FreeCString(js, msg);
return JS_NULL;
}
static JSValue js_tracy_image(JSContext *js, JSValue self, int argc, JSValue *argv)
{
if (argc < 3) return JS_NULL;
size_t size;
void *data = js_get_blob_data(js, &size, argv[0]);
if (!data) return JS_EXCEPTION;
uint16_t w = (uint16_t)js2number(js, argv[1]);
uint16_t h = (uint16_t)js2number(js, argv[2]);
uint8_t offset = argc > 3 ? (uint8_t)js2number(js, argv[3]) : 0;
int32_t flip = argc > 4 ? js2bool(js, argv[4]) : 0;
___tracy_emit_frame_image(data, w, h, offset, flip);
return JS_NULL;
}
static JSValue js_tracy_appinfo(JSContext *js, JSValue self, int argc, JSValue *argv)
{
if (argc < 1) return JS_NULL;
const char *txt = JS_ToCString(js, argv[0]);
if (!txt) return JS_EXCEPTION;
___tracy_emit_message_appinfo(txt, strlen(txt));
JS_FreeCString(js, txt);
return JS_NULL;
}
static JSValue js_tracy_begin_sampling(JSContext *js, JSValue self, int argc, JSValue *argv)
{
// int result = ___tracy_begin_sampling_profiler();
// return JS_NewInt32(js, result);
return JS_NULL;
}
static JSValue js_tracy_end_sampling(JSContext *js, JSValue self, int argc, JSValue *argv)
{
// ___tracy_end_sampling_profiler();
return JS_NULL;
}
static const JSCFunctionListEntry js_tracy_funcs[] = {
JS_CFUNC_DEF("init", 0, js_tracy_init),
JS_CFUNC_DEF("frame", 0, js_tracy_frame),
JS_CFUNC_DEF("message", 1, js_tracy_message),
JS_CFUNC_DEF("image", 1, js_tracy_image),
JS_CFUNC_DEF("appinfo", 1, js_tracy_appinfo),
JS_CFUNC_DEF("beginSampling", 0, js_tracy_begin_sampling),
JS_CFUNC_DEF("endSampling", 0, js_tracy_end_sampling),
};
CELL_USE_FUNCS(js_tracy_funcs)

278
tracy/Tracy.hpp Normal file
View File

@@ -0,0 +1,278 @@
#ifndef __TRACY_HPP__
#define __TRACY_HPP__
#include "../common/TracyColor.hpp"
#include "../common/TracySystem.hpp"
#ifndef TracyFunction
# define TracyFunction __FUNCTION__
#endif
#ifndef TracyFile
# define TracyFile __FILE__
#endif
#ifndef TracyLine
# define TracyLine TracyConcat(__LINE__,U) // MSVC Edit and continue __LINE__ is non-constant. See https://developercommunity.visualstudio.com/t/-line-cannot-be-used-as-an-argument-for-constexpr/195665
#endif
#ifndef TRACY_ENABLE
#define TracyNoop
#define ZoneNamed(x,y)
#define ZoneNamedN(x,y,z)
#define ZoneNamedC(x,y,z)
#define ZoneNamedNC(x,y,z,w)
#define ZoneTransient(x,y)
#define ZoneTransientN(x,y,z)
#define ZoneScoped
#define ZoneScopedN(x)
#define ZoneScopedC(x)
#define ZoneScopedNC(x,y)
#define ZoneText(x,y)
#define ZoneTextV(x,y,z)
#define ZoneTextF(x,...)
#define ZoneTextVF(x,y,...)
#define ZoneName(x,y)
#define ZoneNameV(x,y,z)
#define ZoneNameF(x,...)
#define ZoneNameVF(x,y,...)
#define ZoneColor(x)
#define ZoneColorV(x,y)
#define ZoneValue(x)
#define ZoneValueV(x,y)
#define ZoneIsActive false
#define ZoneIsActiveV(x) false
#define FrameMark
#define FrameMarkNamed(x)
#define FrameMarkStart(x)
#define FrameMarkEnd(x)
#define FrameImage(x,y,z,w,a)
#define TracyLockable( type, varname ) type varname
#define TracyLockableN( type, varname, desc ) type varname
#define TracySharedLockable( type, varname ) type varname
#define TracySharedLockableN( type, varname, desc ) type varname
#define LockableBase( type ) type
#define SharedLockableBase( type ) type
#define LockMark(x) (void)x
#define LockableName(x,y,z)
#define TracyPlot(x,y)
#define TracyPlotConfig(x,y,z,w,a)
#define TracyMessage(x,y)
#define TracyMessageL(x)
#define TracyMessageC(x,y,z)
#define TracyMessageLC(x,y)
#define TracyAppInfo(x,y)
#define TracyAlloc(x,y)
#define TracyFree(x)
#define TracyMemoryDiscard(x)
#define TracySecureAlloc(x,y)
#define TracySecureFree(x)
#define TracySecureMemoryDiscard(x)
#define TracyAllocN(x,y,z)
#define TracyFreeN(x,y)
#define TracySecureAllocN(x,y,z)
#define TracySecureFreeN(x,y)
#define ZoneNamedS(x,y,z)
#define ZoneNamedNS(x,y,z,w)
#define ZoneNamedCS(x,y,z,w)
#define ZoneNamedNCS(x,y,z,w,a)
#define ZoneTransientS(x,y,z)
#define ZoneTransientNS(x,y,z,w)
#define ZoneScopedS(x)
#define ZoneScopedNS(x,y)
#define ZoneScopedCS(x,y)
#define ZoneScopedNCS(x,y,z)
#define TracyAllocS(x,y,z)
#define TracyFreeS(x,y)
#define TracyMemoryDiscardS(x,y)
#define TracySecureAllocS(x,y,z)
#define TracySecureFreeS(x,y)
#define TracySecureMemoryDiscardS(x,y)
#define TracyAllocNS(x,y,z,w)
#define TracyFreeNS(x,y,z)
#define TracySecureAllocNS(x,y,z,w)
#define TracySecureFreeNS(x,y,z)
#define TracyMessageS(x,y,z)
#define TracyMessageLS(x,y)
#define TracyMessageCS(x,y,z,w)
#define TracyMessageLCS(x,y,z)
#define TracySourceCallbackRegister(x,y)
#define TracyParameterRegister(x,y)
#define TracyParameterSetup(x,y,z,w)
#define TracyIsConnected false
#define TracyIsStarted false
#define TracySetProgramName(x)
#define TracyFiberEnter(x)
#define TracyFiberEnterHint(x,y)
#define TracyFiberLeave
#else
#include <string.h>
#include "../client/TracyLock.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyScoped.hpp"
#ifndef TRACY_CALLSTACK
#define TRACY_CALLSTACK 0
#endif
#define TracyNoop tracy::ProfilerAvailable()
#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active )
#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active )
#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active )
#if defined(TRACY_ALLOW_SHADOW_WARNING)
#define SuppressVarShadowWarning(Expr) Expr
#elif defined(__clang__)
#define SuppressVarShadowWarning(Expr) \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Wshadow\"") \
Expr; \
_Pragma("clang diagnostic pop")
#elif defined(__GNUC__)
#define SuppressVarShadowWarning(Expr) \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wshadow\"") \
Expr; \
_Pragma("GCC diagnostic pop")
#elif defined(_MSC_VER)
#define SuppressVarShadowWarning(Expr) \
_Pragma("warning(push)") \
_Pragma("warning(disable : 4456)") \
Expr; \
_Pragma("warning(pop)")
#else
#define SuppressVarShadowWarning(Expr) Expr
#endif
#define ZoneScoped SuppressVarShadowWarning( ZoneNamed( ___tracy_scoped_zone, true ) )
#define ZoneScopedN( name ) SuppressVarShadowWarning( ZoneNamedN( ___tracy_scoped_zone, name, true ) )
#define ZoneScopedC( color ) SuppressVarShadowWarning( ZoneNamedC( ___tracy_scoped_zone, color, true ) )
#define ZoneScopedNC( name, color ) SuppressVarShadowWarning( ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) )
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size )
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size )
#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ )
#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ )
#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size )
#define ZoneNameV( varname, txt, size ) varname.Name( txt, size )
#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ )
#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ )
#define ZoneColor( color ) ___tracy_scoped_zone.Color( color )
#define ZoneColorV( varname, color ) varname.Color( color )
#define ZoneValue( value ) ___tracy_scoped_zone.Value( value )
#define ZoneValueV( varname, value ) varname.Value( value )
#define ZoneIsActive ___tracy_scoped_zone.IsActive()
#define ZoneIsActiveV( varname ) varname.IsActive()
#define FrameMark tracy::Profiler::SendFrameMark( nullptr )
#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name )
#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart )
#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd )
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip )
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() }
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() }
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() }
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() }
#define LockableBase( type ) tracy::Lockable<type>
#define SharedLockableBase( type ) tracy::SharedLockable<type>
#define LockMark( varname ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_lock_location_,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &TracyConcat(__tracy_lock_location_,TracyLine) )
#define LockableName( varname, txt, size ) varname.CustomName( txt, size )
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val )
#define TracyPlotConfig( name, type, step, fill, color ) tracy::Profiler::ConfigurePlot( name, type, step, fill, color )
#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size )
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK )
#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK )
#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK )
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false )
#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false )
#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true )
#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true )
#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name )
#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name )
#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK )
#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name )
#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name )
#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK )
#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active )
#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active )
#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false )
#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false )
#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true )
#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true )
#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name )
#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name )
#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth )
#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name )
#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name )
#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth )
#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth )
#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth )
#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth )
#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth )
#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data )
#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data )
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val )
#define TracyIsConnected tracy::GetProfiler().IsConnected()
#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name );
#ifdef TRACY_FIBERS
# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 )
# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint )
# define TracyFiberLeave tracy::Profiler::LeaveFiber()
#endif
#endif
#endif

393
tracy/TracyC.h Normal file
View File

@@ -0,0 +1,393 @@
#ifndef __TRACYC_HPP__
#define __TRACYC_HPP__
#include <stddef.h>
#include <stdint.h>
#include "../common/TracyApi.h"
#ifdef __cplusplus
extern "C" {
#endif
enum TracyPlotFormatEnum
{
TracyPlotFormatNumber,
TracyPlotFormatMemory,
TracyPlotFormatPercentage,
TracyPlotFormatWatt
};
TRACY_API void ___tracy_set_thread_name( const char* name );
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
#ifndef TracyFunction
# define TracyFunction __FUNCTION__
#endif
#ifndef TracyFile
# define TracyFile __FILE__
#endif
#ifndef TracyLine
# define TracyLine __LINE__
#endif
#ifndef TRACY_ENABLE
typedef const void* TracyCZoneCtx;
typedef const void* TracyCLockCtx;
#define TracyCZone(c,x)
#define TracyCZoneN(c,x,y)
#define TracyCZoneC(c,x,y)
#define TracyCZoneNC(c,x,y,z)
#define TracyCZoneEnd(c)
#define TracyCZoneText(c,x,y)
#define TracyCZoneName(c,x,y)
#define TracyCZoneColor(c,x)
#define TracyCZoneValue(c,x)
#define TracyCAlloc(x,y)
#define TracyCFree(x)
#define TracyCMemoryDiscard(x)
#define TracyCSecureAlloc(x,y)
#define TracyCSecureFree(x)
#define TracyCSecureMemoryDiscard(x)
#define TracyCAllocN(x,y,z)
#define TracyCFreeN(x,y)
#define TracyCSecureAllocN(x,y,z)
#define TracyCSecureFreeN(x,y)
#define TracyCFrameMark
#define TracyCFrameMarkNamed(x)
#define TracyCFrameMarkStart(x)
#define TracyCFrameMarkEnd(x)
#define TracyCFrameImage(x,y,z,w,a)
#define TracyCPlot(x,y)
#define TracyCPlotF(x,y)
#define TracyCPlotI(x,y)
#define TracyCPlotConfig(x,y,z,w,a)
#define TracyCMessage(x,y)
#define TracyCMessageL(x)
#define TracyCMessageC(x,y,z)
#define TracyCMessageLC(x,y)
#define TracyCAppInfo(x,y)
#define TracyCZoneS(x,y,z)
#define TracyCZoneNS(x,y,z,w)
#define TracyCZoneCS(x,y,z,w)
#define TracyCZoneNCS(x,y,z,w,a)
#define TracyCAllocS(x,y,z)
#define TracyCFreeS(x,y)
#define TracyCMemoryDiscardS(x,y)
#define TracyCSecureAllocS(x,y,z)
#define TracyCSecureFreeS(x,y)
#define TracyCSecureMemoryDiscardS(x,y)
#define TracyCAllocNS(x,y,z,w)
#define TracyCFreeNS(x,y,z)
#define TracyCSecureAllocNS(x,y,z,w)
#define TracyCSecureFreeNS(x,y,z)
#define TracyCMessageS(x,y,z)
#define TracyCMessageLS(x,y)
#define TracyCMessageCS(x,y,z,w)
#define TracyCMessageLCS(x,y,z)
#define TracyCLockCtx(l)
#define TracyCLockAnnounce(l)
#define TracyCLockTerminate(l)
#define TracyCLockBeforeLock(l)
#define TracyCLockAfterLock(l)
#define TracyCLockAfterUnlock(l)
#define TracyCLockAfterTryLock(l,x)
#define TracyCLockMark(l)
#define TracyCLockCustomName(l,x,y)
#define TracyCIsConnected 0
#define TracyCIsStarted 0
#define TracyCBeginSamplingProfiling() 0
#define TracyCEndSamplingProfiling()
#ifdef TRACY_FIBERS
# define TracyCFiberEnter(fiber)
# define TracyCFiberLeave
#endif
#else
#ifndef TracyConcat
# define TracyConcat(x,y) TracyConcatIndirect(x,y)
#endif
#ifndef TracyConcatIndirect
# define TracyConcatIndirect(x,y) x##y
#endif
struct ___tracy_source_location_data
{
const char* name;
const char* function;
const char* file;
uint32_t line;
uint32_t color;
};
struct ___tracy_c_zone_context
{
uint32_t id;
int32_t active;
};
struct ___tracy_gpu_time_data
{
int64_t gpuTime;
uint16_t queryId;
uint8_t context;
};
struct ___tracy_gpu_zone_begin_data {
uint64_t srcloc;
uint16_t queryId;
uint8_t context;
};
struct ___tracy_gpu_zone_begin_callstack_data {
uint64_t srcloc;
int32_t depth;
uint16_t queryId;
uint8_t context;
};
struct ___tracy_gpu_zone_end_data {
uint16_t queryId;
uint8_t context;
};
struct ___tracy_gpu_new_context_data {
int64_t gpuTime;
float period;
uint8_t context;
uint8_t flags;
uint8_t type;
};
struct ___tracy_gpu_context_name_data {
uint8_t context;
const char* name;
uint16_t len;
};
struct ___tracy_gpu_calibration_data {
int64_t gpuTime;
int64_t cpuDelta;
uint8_t context;
};
struct ___tracy_gpu_time_sync_data {
int64_t gpuTime;
uint8_t context;
};
struct __tracy_lockable_context_data;
// Some containers don't support storing const types.
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
typedef struct __tracy_lockable_context_data* TracyCLockCtx;
#ifdef TRACY_MANUAL_LIFETIME
TRACY_API void ___tracy_startup_profiler(void);
TRACY_API void ___tracy_shutdown_profiler(void);
TRACY_API int32_t ___tracy_profiler_started(void);
# define TracyCIsStarted ___tracy_profiler_started()
#else
# define TracyCIsStarted 1
#endif
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color );
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active );
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color );
TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data );
TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data );
TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data );
TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data );
TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data );
TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data );
TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data );
TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data );
TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data );
TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data );
TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data );
TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data );
TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data );
TRACY_API int32_t ___tracy_connected(void);
#ifndef TRACY_CALLSTACK
#define TRACY_CALLSTACK 0
#endif
#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx );
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size );
#define TracyCZoneColor( ctx, color ) ___tracy_emit_zone_color( ctx, color );
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure );
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure );
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure );
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure );
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure );
TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth );
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth );
TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth );
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth );
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth );
#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK );
#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK );
#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
TRACY_API void ___tracy_emit_frame_mark( const char* name );
TRACY_API void ___tracy_emit_frame_mark_start( const char* name );
TRACY_API void ___tracy_emit_frame_mark_end( const char* name );
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip );
#define TracyCFrameMark ___tracy_emit_frame_mark( 0 );
#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name );
#define TracyCFrameMarkStart( name ) ___tracy_emit_frame_mark_start( name );
#define TracyCFrameMarkEnd( name ) ___tracy_emit_frame_mark_end( name );
#define TracyCFrameImage( image, width, height, offset, flip ) ___tracy_emit_frame_image( image, width, height, offset, flip );
TRACY_API void ___tracy_emit_plot( const char* name, double val );
TRACY_API void ___tracy_emit_plot_float( const char* name, float val );
TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val );
TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color );
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
#define TracyCPlotF( name, val ) ___tracy_emit_plot_float( name, val );
#define TracyCPlotI( name, val ) ___tracy_emit_plot_int( name, val );
#define TracyCPlotConfig( name, type, step, fill, color ) ___tracy_emit_plot_config( name, type, step, fill, color );
#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth )
#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth )
#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc );
TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired );
TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc );
TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz );
#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) );
#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock );
#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock );
#define TracyCLockAfterLock( lock ) ___tracy_after_lock_lockable_ctx( lock );
#define TracyCLockAfterUnlock( lock ) ___tracy_after_unlock_lockable_ctx( lock );
#define TracyCLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_lockable_ctx( lock, acquired );
#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) );
#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz );
#define TracyCIsConnected ___tracy_connected()
TRACY_API int ___tracy_begin_sampling_profiler( void );
TRACY_API void ___tracy_end_sampling_profiler( void );
#define TracyCBeginSamplingProfiling() ___tracy_begin_sampling_profiling()
#define TracyCEndSamplingProfiling() ___tracy_end_sampling_profiling()
#ifdef TRACY_FIBERS
TRACY_API void ___tracy_fiber_enter( const char* fiber );
TRACY_API void ___tracy_fiber_leave( void );
# define TracyCFiberEnter( fiber ) ___tracy_fiber_enter( fiber );
# define TracyCFiberLeave ___tracy_fiber_leave();
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif

1325
tracy/TracyCUDA.hpp Normal file

File diff suppressed because it is too large Load Diff

466
tracy/TracyD3D11.hpp Normal file
View File

@@ -0,0 +1,466 @@
#ifndef __TRACYD3D11_HPP__
#define __TRACYD3D11_HPP__
#ifndef TRACY_ENABLE
#define TracyD3D11Context(device,queue) nullptr
#define TracyD3D11Destroy(ctx)
#define TracyD3D11ContextName(ctx, name, size)
#define TracyD3D11NewFrame(ctx)
#define TracyD3D11Zone(ctx, name)
#define TracyD3D11ZoneC(ctx, name, color)
#define TracyD3D11NamedZone(ctx, varname, name, active)
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
#define TracyD3D11ZoneTransient(ctx, varname, name, active)
#define TracyD3D11ZoneS(ctx, name, depth)
#define TracyD3D11ZoneCS(ctx, name, color, depth)
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active)
#define TracyD3D11Collect(ctx)
namespace tracy
{
class D3D11ZoneScope {};
}
using TracyD3D11Ctx = void*;
#else
#include <atomic>
#include <assert.h>
#include <stdlib.h>
#include "Tracy.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include "../common/TracyYield.hpp"
#include <d3d11.h>
#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
namespace tracy
{
class D3D11Ctx
{
friend class D3D11ZoneScope;
static constexpr uint32_t MaxQueries = 64 * 1024;
enum CollectMode { POLL, BLOCK };
public:
D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
{
// TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument
m_device = device;
device->AddRef();
m_immediateDevCtx = devicectx;
devicectx->AddRef();
{
D3D11_QUERY_DESC desc = { };
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery)))
{
TracyD3D11Panic("unable to create disjoint timestamp query.", return);
}
}
for (ID3D11Query*& query : m_queries)
{
D3D11_QUERY_DESC desc = { };
desc.Query = D3D11_QUERY_TIMESTAMP;
if (FAILED(m_device->CreateQuery(&desc, &query)))
{
TracyD3D11Panic("unable to create timestamp query.", return);
}
}
// Calibrate CPU and GPU timestamps
int64_t tcpu = 0;
int64_t tgpu = 0;
for (int attempts = 0; attempts < 50; attempts++)
{
m_immediateDevCtx->Begin(m_disjointQuery);
m_immediateDevCtx->End(m_queries[0]);
m_immediateDevCtx->End(m_disjointQuery);
int64_t tcpu0 = Profiler::GetTime();
WaitForQuery(m_disjointQuery);
// NOTE: one would expect that by waiting for the enclosing disjoint query to finish,
// all timestamp queries within would also be readily available, but that does not
// seem to be the case here... See https://github.com/wolfpld/tracy/issues/947
WaitForQuery(m_queries[0]);
int64_t tcpu1 = Profiler::GetTime();
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK)
{
TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato);
continue;
}
if (disjoint.Disjoint)
continue;
UINT64 timestamp = 0;
if (m_immediateDevCtx->GetData(m_queries[0], &timestamp, sizeof(timestamp), 0) != S_OK)
continue; // this should never happen (we waited for the query to finish above)
tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2;
tgpu = timestamp * (1000000000 / disjoint.Frequency);
break;
}
// ready to roll
m_contextId = GetGpuCtxCounter().fetch_add(1);
m_immediateDevCtx->Begin(m_disjointQuery);
m_previousCheckpoint = m_nextCheckpoint = 0;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()?
MemWrite( &item->gpuNewContext.period, 1.0f );
MemWrite( &item->gpuNewContext.context, m_contextId);
MemWrite( &item->gpuNewContext.flags, uint8_t(0) );
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
~D3D11Ctx()
{
// collect all pending timestamps before destroying everything
do
{
Collect(BLOCK);
} while (m_previousCheckpoint != m_queryCounter);
for (ID3D11Query* query : m_queries)
{
query->Release();
}
m_immediateDevCtx->End(m_disjointQuery);
m_disjointQuery->Release();
m_immediateDevCtx->Release();
m_device->Release();
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_contextId );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect(CollectMode mode = POLL)
{
ZoneScopedC( Color::Red4 );
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
m_previousCheckpoint = m_nextCheckpoint = m_queryCounter;
return;
}
#endif
if (m_previousCheckpoint == m_nextCheckpoint)
{
uintptr_t nextCheckpoint = m_queryCounter;
if (nextCheckpoint == m_nextCheckpoint)
{
return;
}
m_nextCheckpoint = nextCheckpoint;
m_immediateDevCtx->End(m_disjointQuery);
}
if (mode == CollectMode::BLOCK)
{
WaitForQuery(m_disjointQuery);
}
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK)
{
return;
}
if (disjoint.Disjoint == TRUE)
{
m_previousCheckpoint = m_nextCheckpoint;
TracyD3D11Panic("disjoint timestamps detected; dropping.");
return;
}
auto begin = m_previousCheckpoint;
auto end = m_nextCheckpoint;
for (auto i = begin; i != end; ++i)
{
uint32_t k = RingIndex(i);
UINT64 timestamp = 0;
if (m_immediateDevCtx->GetData(m_queries[k], &timestamp, sizeof(timestamp), 0) != S_OK)
{
TracyD3D11Panic("timestamp expected to be ready, but it was not!");
break;
}
timestamp *= (1000000000ull / disjoint.Frequency);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(timestamp));
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
}
// disjoint timestamp queries should only be invoked once per frame or less
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query
m_immediateDevCtx->Begin(m_disjointQuery);
m_previousCheckpoint = m_nextCheckpoint;
}
private:
tracy_force_inline uint32_t RingIndex(uintptr_t index)
{
index %= MaxQueries;
return static_cast<uint32_t>(index);
}
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
{
// wrap-around safe: all unsigned
uintptr_t count = end - begin;
return static_cast<uint32_t>(count);
}
tracy_force_inline uint32_t NextQueryId()
{
auto id = m_queryCounter++;
if (RingCount(m_previousCheckpoint, id) >= MaxQueries)
{
TracyD3D11Panic("too many pending timestamp queries.");
// #TODO: return some sentinel value; ideally a "hidden" query index
}
return RingIndex(id);
}
tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id)
{
return m_queries[id];
}
tracy_force_inline void WaitForQuery(ID3D11Query* query)
{
m_immediateDevCtx->Flush();
while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK)
YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends...
}
tracy_force_inline uint8_t GetContextId() const
{
return m_contextId;
}
ID3D11Device* m_device = nullptr;
ID3D11DeviceContext* m_immediateDevCtx = nullptr;
ID3D11Query* m_queries[MaxQueries];
ID3D11Query* m_disjointQuery = nullptr;
uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere?
uintptr_t m_queryCounter = 0;
uintptr_t m_previousCheckpoint = 0;
uintptr_t m_nextCheckpoint = 0;
};
class D3D11ZoneScope
{
public:
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active )
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
}
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active )
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
if( depth > 0 && has_callstack() )
{
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
}
else
{
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
}
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active)
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
if ( depth > 0 && has_callstack() )
{
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
}
else
{
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
}
tracy_force_inline ~D3D11ZoneScope()
{
if( !m_active ) return;
const auto queryId = m_ctx->NextQueryId();
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() );
Profiler::QueueSerialFinish();
}
private:
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active )
#ifdef TRACY_ON_DEMAND
: m_active( active && GetProfiler().IsConnected() )
#else
: m_active( active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
}
void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation)
{
const auto queryId = m_ctx->NextQueryId();
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
MemWrite( &item->hdr.type, queueItemType);
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() );
Profiler::QueueSerialFinish();
}
const bool m_active;
D3D11Ctx* m_ctx;
};
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
{
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
new(ctx) D3D11Ctx( device, devicectx );
return ctx;
}
static inline void DestroyD3D11Context( D3D11Ctx* ctx )
{
ctx->~D3D11Ctx();
tracy_free( ctx );
}
}
#undef TracyD3D11Panic
using TracyD3D11Ctx = tracy::D3D11Ctx*;
#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx );
#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx);
#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size);
#define TracyD3D11UnnamedZone ___tracy_gpu_d3d11_zone
#define TracyD3D11SrcLocSymbol TracyConcat(__tracy_gpu_d3d11_source_location,TracyLine)
#define TracyD3D11SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D11SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, TRACY_CALLSTACK, true )
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, TRACY_CALLSTACK, true )
# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active );
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active );
# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
#else
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, TracyD3D11UnnamedZone, name, true )
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, TracyD3D11UnnamedZone, name, color, true )
# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active );
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active );
# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), active };
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, depth, true )
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, depth, true )
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active );
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active );
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), depth, active };
#else
# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name )
# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color )
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active )
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active )
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D11ZoneTransient(ctx, varname, name, active)
#endif
#define TracyD3D11Collect( ctx ) ctx->Collect();
#endif
#endif

500
tracy/TracyD3D12.hpp Normal file
View File

@@ -0,0 +1,500 @@
#ifndef __TRACYD3D12_HPP__
#define __TRACYD3D12_HPP__
#ifndef TRACY_ENABLE
#define TracyD3D12Context(device, queue) nullptr
#define TracyD3D12Destroy(ctx)
#define TracyD3D12ContextName(ctx, name, size)
#define TracyD3D12NewFrame(ctx)
#define TracyD3D12Zone(ctx, cmdList, name)
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
#define TracyD3D12ZoneS(ctx, cmdList, name, depth)
#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth)
#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active)
#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active)
#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active)
#define TracyD3D12Collect(ctx)
namespace tracy
{
class D3D12ZoneScope {};
}
using TracyD3D12Ctx = void*;
#else
#include "Tracy.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include <cstdlib>
#include <cassert>
#include <d3d12.h>
#include <dxgi.h>
#include <queue>
#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
namespace tracy
{
struct D3D12QueryPayload
{
uint32_t m_queryIdStart = 0;
uint32_t m_queryCount = 0;
};
// Command queue context.
class D3D12QueueCtx
{
friend class D3D12ZoneScope;
ID3D12Device* m_device = nullptr;
ID3D12CommandQueue* m_queue = nullptr;
uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere?
ID3D12QueryHeap* m_queryHeap = nullptr;
ID3D12Resource* m_readbackBuffer = nullptr;
// In-progress payload.
uint32_t m_queryLimit = 0;
std::atomic<uint32_t> m_queryCounter = 0;
uint32_t m_previousQueryCounter = 0;
uint32_t m_activePayload = 0;
ID3D12Fence* m_payloadFence = nullptr;
std::queue<D3D12QueryPayload> m_payloadQueue;
UINT64 m_prevCalibrationTicksCPU = 0;
void RecalibrateClocks()
{
UINT64 cpuTimestamp;
UINT64 gpuTimestamp;
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
{
TracyD3D12Panic("failed to obtain queue clock calibration counters.", return);
}
int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU;
if (cpuDeltaTicks > 0)
{
static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc();
int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick;
// Save the device cpu timestamp, not the Tracy profiler timestamp:
m_prevCalibrationTicksCPU = cpuTimestamp;
cpuTimestamp = Profiler::GetTime();
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS);
MemWrite(&item->gpuCalibration.context, GetId());
SubmitQueueItem(item);
}
}
tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item)
{
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif
Profiler::QueueSerialFinish();
}
public:
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
: m_device(device)
, m_queue(queue)
{
// Verify we support timestamp queries on this queue.
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
{
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData));
if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE))
{
TracyD3D12Panic("Platform does not support profiling of copy queues.", return);
}
}
static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs
m_queryLimit = MaxQueries;
D3D12_QUERY_HEAP_DESC heapDesc{};
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
heapDesc.Count = m_queryLimit;
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
{
m_queryLimit /= 2;
heapDesc.Count = m_queryLimit;
}
// Create a readback buffer, which will be used as a destination for the query data.
D3D12_RESOURCE_DESC readbackBufferDesc{};
readbackBufferDesc.Alignment = 0;
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
readbackBufferDesc.Height = 1;
readbackBufferDesc.DepthOrArraySize = 1;
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
readbackBufferDesc.MipLevels = 1;
readbackBufferDesc.SampleDesc.Count = 1;
readbackBufferDesc.SampleDesc.Quality = 0;
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
D3D12_HEAP_PROPERTIES readbackHeapProps{};
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
readbackHeapProps.CreationNodeMask = 0;
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
{
TracyD3D12Panic("Failed to create query readback buffer.", return);
}
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
{
TracyD3D12Panic("Failed to create payload fence.", return);
}
float period = [queue]()
{
uint64_t timestampFrequency;
if (FAILED(queue->GetTimestampFrequency(&timestampFrequency)))
{
return 0.0f;
}
return static_cast<float>( 1E+09 / static_cast<double>(timestampFrequency) );
}();
if (period == 0.0f)
{
TracyD3D12Panic("Failed to get timestamp frequency.", return);
}
uint64_t cpuTimestamp;
uint64_t gpuTimestamp;
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
{
TracyD3D12Panic("Failed to get queue clock calibration.", return);
}
// Save the device cpu timestamp, not the profiler's timestamp.
m_prevCalibrationTicksCPU = cpuTimestamp;
cpuTimestamp = Profiler::GetTime();
// all checked: ready to roll
m_contextId = GetGpuCtxCounter().fetch_add(1);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()?
MemWrite(&item->gpuNewContext.period, period);
MemWrite(&item->gpuNewContext.context, GetId());
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
SubmitQueueItem(item);
}
~D3D12QueueCtx()
{
ZoneScopedC(Color::Red4);
// collect all pending timestamps
while (m_payloadFence->GetCompletedValue() != m_activePayload)
/* busy-wait ... */;
Collect();
m_payloadFence->Release();
m_readbackBuffer->Release();
m_queryHeap->Release();
}
void NewFrame()
{
uint32_t queryCounter = m_queryCounter.exchange(0);
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter });
m_previousQueryCounter += queryCounter;
if (m_previousQueryCounter >= m_queryLimit)
{
m_previousQueryCounter -= m_queryLimit;
}
m_queue->Signal(m_payloadFence, ++m_activePayload);
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, GetId());
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
SubmitQueueItem(item);
}
void Collect()
{
ZoneScopedC(Color::Red4);
#ifdef TRACY_ON_DEMAND
if (!GetProfiler().IsConnected())
{
m_queryCounter = 0;
return;
}
#endif
// Find out what payloads are available.
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
if (!payloadCount)
{
return; // No payloads are available yet, exit out.
}
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
// Map the readback buffer so we can fetch the query data from the GPU.
void* readbackBufferMapping = nullptr;
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
{
TracyD3D12Panic("Failed to map readback buffer.", return);
}
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
for (uint32_t i = 0; i < payloadCount; ++i)
{
const auto& payload = m_payloadQueue.front();
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
{
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
const auto timestamp = timestampData[counter];
const auto queryId = counter;
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, timestamp);
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuTime.context, GetId());
Profiler::QueueSerialFinish();
}
m_payloadQueue.pop();
}
m_readbackBuffer->Unmap(0, nullptr);
// Recalibrate to account for drift.
RecalibrateClocks();
}
private:
tracy_force_inline uint32_t NextQueryId()
{
uint32_t queryCounter = m_queryCounter.fetch_add(2);
if (queryCounter >= m_queryLimit)
{
TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries.");
// #TODO: consider returning an invalid id or sentinel value here
}
const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit;
return id;
}
tracy_force_inline uint8_t GetId() const
{
return m_contextId;
}
};
class D3D12ZoneScope
{
const bool m_active;
D3D12QueueCtx* m_ctx = nullptr;
ID3D12GraphicsCommandList* m_cmdList = nullptr;
uint32_t m_queryId = 0; // Used for tracking in nested zones.
tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation)
{
MemWrite(&item->hdr.type, type);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, srcLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if (!m_active) return;
m_ctx = ctx;
m_cmdList = cmdList;
m_queryId = m_ctx->NextQueryId();
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
}
public:
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcLocation));
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcLocation));
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
}
tracy_force_inline ~D3D12ZoneScope()
{
if (!m_active) return;
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
Profiler::QueueSerialFinish();
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t));
}
};
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
{
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
new (ctx) D3D12QueueCtx{ device, queue };
return ctx;
}
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
{
ctx->~D3D12QueueCtx();
tracy_free(ctx);
}
}
#undef TracyD3D12Panic
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size);
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone
#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine)
#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true)
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true)
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
#else
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true)
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true)
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active };
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true)
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true)
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active };
#else
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name)
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color)
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
#endif
#define TracyD3D12Collect(ctx) ctx->Collect();
#endif
#endif

486
tracy/TracyLua.hpp Normal file
View File

@@ -0,0 +1,486 @@
#ifndef __TRACYLUA_HPP__
#define __TRACYLUA_HPP__
// Include this file after you include lua headers.
#ifndef TRACY_ENABLE
#include <string.h>
namespace tracy
{
namespace detail
{
static inline int noop( lua_State* L ) { return 0; }
}
static inline void LuaRegister( lua_State* L )
{
lua_newtable( L );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneBegin" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneBeginN" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneBeginS" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneBeginNS" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneEnd" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneText" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneName" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "Message" );
lua_setglobal( L, "tracy" );
}
static inline char* FindEnd( char* ptr )
{
unsigned int cnt = 1;
while( cnt != 0 )
{
if( *ptr == '(' ) cnt++;
else if( *ptr == ')' ) cnt--;
ptr++;
}
return ptr;
}
static inline void LuaRemove( char* script )
{
while( *script )
{
if( strncmp( script, "tracy.", 6 ) == 0 )
{
if( strncmp( script + 6, "Zone", 4 ) == 0 )
{
if( strncmp( script + 10, "End()", 5 ) == 0 )
{
memset( script, ' ', 15 );
script += 15;
}
else if( strncmp( script + 10, "Begin()", 7 ) == 0 )
{
memset( script, ' ', 17 );
script += 17;
}
else if( strncmp( script + 10, "Text(", 5 ) == 0 )
{
auto end = FindEnd( script + 15 );
memset( script, ' ', end - script );
script = end;
}
else if( strncmp( script + 10, "Name(", 5 ) == 0 )
{
auto end = FindEnd( script + 15 );
memset( script, ' ', end - script );
script = end;
}
else if( strncmp( script + 10, "BeginN(", 7 ) == 0 )
{
auto end = FindEnd( script + 17 );
memset( script, ' ', end - script );
script = end;
}
else if( strncmp( script + 10, "BeginS(", 7 ) == 0 )
{
auto end = FindEnd( script + 17 );
memset( script, ' ', end - script );
script = end;
}
else if( strncmp( script + 10, "BeginNS(", 8 ) == 0 )
{
auto end = FindEnd( script + 18 );
memset( script, ' ', end - script );
script = end;
}
else
{
script += 10;
}
}
else if( strncmp( script + 6, "Message(", 8 ) == 0 )
{
auto end = FindEnd( script + 14 );
memset( script, ' ', end - script );
script = end;
}
else
{
script += 6;
}
}
else
{
script++;
}
}
}
static inline void LuaHook( lua_State* L, lua_Debug* ar ) {}
}
#else
#include <assert.h>
#include <limits>
#include "../common/TracyColor.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyForceInline.hpp"
#include "../common/TracySystem.hpp"
#include "../client/TracyProfiler.hpp"
namespace tracy
{
#ifdef TRACY_ON_DEMAND
TRACY_API LuaZoneState& GetLuaZoneState();
#endif
namespace detail
{
static inline void LuaShortenSrc( char* dst, const char* src )
{
size_t l = std::min( (size_t)255, strlen( src ) );
memcpy( dst, src, l );
dst[l] = 0;
}
#ifdef TRACY_HAS_CALLSTACK
static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
{
assert( depth <= 64 );
lua_Debug dbg[64];
const char* func[64];
uint32_t fsz[64];
uint32_t ssz[64];
uint8_t cnt;
uint16_t spaceNeeded = sizeof( cnt );
for( cnt=0; cnt<depth; cnt++ )
{
if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break;
lua_getinfo( L, "Snl", dbg+cnt );
func[cnt] = dbg[cnt].name ? dbg[cnt].name : dbg[cnt].short_src;
fsz[cnt] = uint32_t( strlen( func[cnt] ) );
ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) );
spaceNeeded += fsz[cnt] + ssz[cnt];
}
spaceNeeded += cnt * ( 4 + 2 + 2 ); // source line, function string length, source string length
auto ptr = (char*)tracy_malloc( spaceNeeded + 2 );
auto dst = ptr;
memcpy( dst, &spaceNeeded, 2 ); dst += 2;
memcpy( dst, &cnt, 1 ); dst++;
for( uint8_t i=0; i<cnt; i++ )
{
const uint32_t line = dbg[i].currentline;
memcpy( dst, &line, 4 ); dst += 4;
assert( fsz[i] <= (std::numeric_limits<uint16_t>::max)() );
memcpy( dst, fsz+i, 2 ); dst += 2;
memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
assert( ssz[i] <= (std::numeric_limits<uint16_t>::max)() );
memcpy( dst, ssz+i, 2 ); dst += 2;
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
}
assert( dst - ptr == spaceNeeded + 2 );
TracyQueuePrepare( QueueType::CallstackAlloc );
MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr );
MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) );
TracyQueueCommit( callstackAllocFatThread );
}
static inline int LuaZoneBeginS( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = GetLuaZoneState().counter++;
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
GetLuaZoneState().active = GetProfiler().IsConnected();
if( !GetLuaZoneState().active ) return 0;
#endif
#ifdef TRACY_CALLSTACK
const uint32_t depth = TRACY_CALLSTACK;
#else
const auto depth = uint32_t( lua_tointeger( L, 1 ) );
#endif
SendLuaCallstack( L, depth );
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
char src[256];
LuaShortenSrc( src, dbg.source );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
return 0;
}
static inline int LuaZoneBeginNS( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = GetLuaZoneState().counter++;
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
GetLuaZoneState().active = GetProfiler().IsConnected();
if( !GetLuaZoneState().active ) return 0;
#endif
#ifdef TRACY_CALLSTACK
const uint32_t depth = TRACY_CALLSTACK;
#else
const auto depth = uint32_t( lua_tointeger( L, 2 ) );
#endif
SendLuaCallstack( L, depth );
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
size_t nsz;
char src[256];
LuaShortenSrc( src, dbg.source );
const auto name = lua_tolstring( L, 1, &nsz );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
return 0;
}
#endif
static inline int LuaZoneBegin( lua_State* L )
{
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
return LuaZoneBeginS( L );
#else
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = GetLuaZoneState().counter++;
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
GetLuaZoneState().active = GetProfiler().IsConnected();
if( !GetLuaZoneState().active ) return 0;
#endif
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
char src[256];
LuaShortenSrc( src, dbg.source );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
return 0;
#endif
}
static inline int LuaZoneBeginN( lua_State* L )
{
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
return LuaZoneBeginNS( L );
#else
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = GetLuaZoneState().counter++;
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
GetLuaZoneState().active = GetProfiler().IsConnected();
if( !GetLuaZoneState().active ) return 0;
#endif
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
size_t nsz;
char src[256];
LuaShortenSrc( src, dbg.source );
const auto name = lua_tolstring( L, 1, &nsz );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
return 0;
#endif
}
static inline int LuaZoneEnd( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
assert( GetLuaZoneState().counter != 0 );
GetLuaZoneState().counter--;
if( !GetLuaZoneState().active ) return 0;
if( !GetProfiler().IsConnected() )
{
GetLuaZoneState().active = false;
return 0;
}
#endif
TracyQueuePrepare( QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
TracyQueueCommit( zoneEndThread );
return 0;
}
static inline int LuaZoneText( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
if( !GetLuaZoneState().active ) return 0;
if( !GetProfiler().IsConnected() )
{
GetLuaZoneState().active = false;
return 0;
}
#endif
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
assert( size < (std::numeric_limits<uint16_t>::max)() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::ZoneText );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyQueueCommit( zoneTextFatThread );
return 0;
}
static inline int LuaZoneName( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
if( !GetLuaZoneState().active ) return 0;
if( !GetProfiler().IsConnected() )
{
GetLuaZoneState().active = false;
return 0;
}
#endif
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
assert( size < (std::numeric_limits<uint16_t>::max)() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::ZoneName );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyQueueCommit( zoneTextFatThread );
return 0;
}
static inline int LuaMessage( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return 0;
#endif
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
assert( size < (std::numeric_limits<uint16_t>::max)() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::Message );
MemWrite( &item->messageFat.time, Profiler::GetTime() );
MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
TracyQueueCommit( messageFatThread );
return 0;
}
}
static inline void LuaRegister( lua_State* L )
{
lua_newtable( L );
lua_pushcfunction( L, detail::LuaZoneBegin );
lua_setfield( L, -2, "ZoneBegin" );
lua_pushcfunction( L, detail::LuaZoneBeginN );
lua_setfield( L, -2, "ZoneBeginN" );
#ifdef TRACY_HAS_CALLSTACK
lua_pushcfunction( L, detail::LuaZoneBeginS );
lua_setfield( L, -2, "ZoneBeginS" );
lua_pushcfunction( L, detail::LuaZoneBeginNS );
lua_setfield( L, -2, "ZoneBeginNS" );
#else
lua_pushcfunction( L, detail::LuaZoneBegin );
lua_setfield( L, -2, "ZoneBeginS" );
lua_pushcfunction( L, detail::LuaZoneBeginN );
lua_setfield( L, -2, "ZoneBeginNS" );
#endif
lua_pushcfunction( L, detail::LuaZoneEnd );
lua_setfield( L, -2, "ZoneEnd" );
lua_pushcfunction( L, detail::LuaZoneText );
lua_setfield( L, -2, "ZoneText" );
lua_pushcfunction( L, detail::LuaZoneName );
lua_setfield( L, -2, "ZoneName" );
lua_pushcfunction( L, detail::LuaMessage );
lua_setfield( L, -2, "Message" );
lua_setglobal( L, "tracy" );
}
static inline void LuaRemove( char* script ) {}
static inline void LuaHook( lua_State* L, lua_Debug* ar )
{
if ( ar->event == LUA_HOOKCALL )
{
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = GetLuaZoneState().counter++;
if ( zoneCnt != 0 && !GetLuaZoneState().active ) return;
GetLuaZoneState().active = GetProfiler().IsConnected();
if ( !GetLuaZoneState().active ) return;
#endif
lua_getinfo( L, "Snl", ar );
char src[256];
detail::LuaShortenSrc( src, ar->short_src );
const auto srcloc = Profiler::AllocSourceLocation( ar->currentline, src, ar->name ? ar->name : ar->short_src );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
else if (ar->event == LUA_HOOKRET) {
#ifdef TRACY_ON_DEMAND
assert( GetLuaZoneState().counter != 0 );
GetLuaZoneState().counter--;
if ( !GetLuaZoneState().active ) return;
if ( !GetProfiler().IsConnected() )
{
GetLuaZoneState().active = false;
return;
}
#endif
TracyQueuePrepare( QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
TracyQueueCommit( zoneEndThread );
}
}
}
#endif
#endif

644
tracy/TracyMetal.hmm Normal file
View File

@@ -0,0 +1,644 @@
#ifndef __TRACYMETAL_HMM__
#define __TRACYMETAL_HMM__
/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple
Silicon devices, but it should also work on Intel-based Macs and older iOS devices).
The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan,
Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where
a command encoder is created. This is because not all hardware supports timestamps at
command granularity, and can only provide timestamps around an entire command encoder.
This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone()
will be added to support the habitual command-level granularity of Tracy GPU back-ends.
Metal also imposes a few restrictions that make the process of requesting and collecting
queries more complicated in Tracy:
a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes)
b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes,
and there's no way to reset them back to zero after timestamps get resolved; the only
way to clear the timestamps is by allocating a new timestamp query buffer
c) if a command encoder records no commands and its corresponding command buffer ends up
committed to the command queue, Metal will "optimize-away" the encoder along with any
timestamp queries associated with it (the timestamp will remain as zero and will never
get resolved)
Because of the limitations above, two timestamp buffers are managed internally. Once one
of the buffers fills up with requests, the second buffer can start serving new requests.
Once all requests in a buffer get resolved and collected, the entire buffer is discarded
and a new one allocated for future requests. (Proper cycling through a ring buffer would
require bookkeeping and completion handlers to collect only the known complete queries.)
In the current implementation, there is potential for a race condition when the buffer is
discarded and reallocated. In practice, the race condition will never materialize so long
as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low.
Finally, there's a timeout mechanism during timestamp collection to detect "empty" command
encoders and ensure progress.
*/
#ifndef TRACY_ENABLE
#define TracyMetalContext(device) nullptr
#define TracyMetalDestroy(ctx)
#define TracyMetalContextName(ctx, name, size)
#define TracyMetalZone(ctx, encoderDesc, name)
#define TracyMetalZoneC(ctx, encoderDesc, name, color)
#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active)
#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active)
#define TracyMetalCollect(ctx)
namespace tracy
{
class MetalZoneScope {};
}
using TracyMetalCtx = void;
#else
#if not __has_feature(objc_arc)
#error TracyMetal requires ARC to be enabled.
#endif
#include <atomic>
#include <cassert>
#include <cstdlib>
#include "Tracy.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
// ok to import if in obj-c code
#import <Metal/Metal.h>
#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__
#define TracyMetalPanic(ret, msg, ...) do { \
char buffer [1024]; \
snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \
TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \
fprintf(stderr, "%s\n", buffer); \
ret; \
} while(false);
#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f
#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
#ifndef TRACY_METAL_DEBUG_MASK
#define TRACY_METAL_DEBUG_MASK (0)
#endif//TRACY_METAL_DEBUG_MASK
#if TRACY_METAL_DEBUG_MASK
#define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; }
#else
#define TracyMetalDebugMasked(mask, ...)
#endif
#if TRACY_METAL_DEBUG_MASK & (1 << 1)
#define TracyMetalDebug_0b00010(...) __VA_ARGS__;
#else
#define TracyMetalDebug_0b00010(...)
#endif
#if TRACY_METAL_DEBUG_MASK & (1 << 4)
#define TracyMetalDebug_0b10000(...) __VA_ARGS__;
#else
#define TracyMetalDebug_0b10000(...)
#endif
#ifndef TracyMetalDebugZoneScopeWireTap
#define TracyMetalDebugZoneScopeWireTap
#endif//TracyMetalDebugZoneScopeWireTap
namespace tracy
{
class MetalCtx
{
friend class MetalZoneScope;
enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_...
public:
static MetalCtx* Create(id<MTLDevice> device)
{
ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4);
auto ctx = static_cast<MetalCtx*>(tracy_malloc(sizeof(MetalCtx)));
new (ctx) MetalCtx(device);
if (ctx->m_contextId == 255)
{
TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context.");
Destroy(ctx);
}
return ctx;
}
static void Destroy(MetalCtx* ctx)
{
ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4);
ctx->~MetalCtx();
tracy_free(ctx);
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_contextId );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
SubmitQueueItem(item);
}
bool Collect()
{
ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4);
#ifdef TRACY_ON_DEMAND
if (!GetProfiler().IsConnected())
{
return true;
}
#endif
// Only one thread is allowed to collect timestamps at any given time
// but there's no need to block contending threads
if (!m_collectionMutex.try_lock())
{
return true;
}
std::unique_lock lock (m_collectionMutex, std::adopt_lock);
uintptr_t begin = m_previousCheckpoint.load();
uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?;
TracyMetalDebugMasked(1<<3, ZoneValue(begin));
TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint));
uint32_t count = RingCount(begin, latestCheckpoint);
if (count == 0) // no pending timestamp queries
{
//uintptr_t nextCheckpoint = m_queryCounter.load();
//if (nextCheckpoint != latestCheckpoint)
//{
// // TODO: signal event / fence now?
//}
return true;
}
// resolve up until the ring buffer boundary and let a subsequenty call
// to Collect handle the wrap-around
bool reallocateBuffer = false;
if (RingIndex(begin) + count >= RingSize())
{
count = RingSize() - RingIndex(begin);
reallocateBuffer = true;
}
TracyMetalDebugMasked(1<<3, ZoneValue(count));
auto buffer_idx = (begin / MaxQueries) % 2;
auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx];
if (count >= RingSize())
{
TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count);
}
TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count));
NSRange range = NSMakeRange(RingIndex(begin), count);
NSData* data = [counterSampleBuffer resolveCounterRange:range];
NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp);
MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes);
if (timestamps == nil)
{
TracyMetalPanic(return false, "Collect: unable to resolve timestamps.");
}
if (numResolvedTimestamps != count)
{
TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count);
}
int resolved = 0;
for (auto i = 0; i < numResolvedTimestamps; i += 2)
{
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") );
MTLTimestamp t_start = timestamps[i+0].timestamp;
MTLTimestamp t_end = timestamps[i+1].timestamp;
uint32_t k = RingIndex(begin + i);
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start)));
if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue))
{
TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k);
break;
}
// Metal will initialize timestamp buffer with zeroes; encountering a zero-value
// timestamp means that the timestamp has not been written and resolved yet
if ((t_start == 0) || (t_end == 0))
{
auto checkTime = std::chrono::high_resolution_clock::now();
auto requestTime = m_timestampRequestTime[k];
auto ms_in_flight = std::chrono::duration<float>(checkTime-requestTime).count()*1000.0f;
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight));
const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f;
if (ms_in_flight < timeout_ms)
break;
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") );
TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight);
t_start = m_mostRecentTimestamp + 5;
t_end = t_start + 5;
}
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone"));
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone"));
{
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_start));
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
}
{
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_end));
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k+1));
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
}
m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp;
TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId"));
resolved += 2;
}
TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load())));
m_previousCheckpoint += resolved;
// Check whether the timestamp buffer has been fully resolved/collected:
// WARN: there's technically a race condition here: NextQuery() may reference the
// buffer that is being released instead of the new one. In practice, this should
// never happen so long as Collect is called frequently enough to prevent pending
// timestamp query requests from piling up too quickly.
if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0)
{
m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries);
}
//RecalibrateClocks(); // to account for drift
return true;
}
private:
MetalCtx(id<MTLDevice> device)
: m_device(device)
{
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue));
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample));
if (m_device == nil)
{
TracyMetalPanic({assert(false);} return, "device is nil.");
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary])
{
TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported.");
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n"));
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n"));
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n"));
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n"));
}
m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries);
m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries);
m_timestampRequestTime.resize(MaxQueries);
MTLTimestamp cpuTimestamp = 0;
MTLTimestamp gpuTimestamp = 0;
[m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp];
m_mostRecentTimestamp = gpuTimestamp;
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp));
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp));
cpuTimestamp = Profiler::GetTime();
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp));
float period = 1.0f;
m_contextId = GetGpuCtxCounter().fetch_add(1);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp));
MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp));
MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()?
MemWrite(&item->gpuNewContext.period, period);
MemWrite(&item->gpuNewContext.context, m_contextId);
//MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0));
MemWrite(&item->gpuNewContext.type, GpuContextType::Metal);
SubmitQueueItem(item);
}
~MetalCtx()
{
// collect the last remnants of Metal GPU activity...
// TODO: add a timeout to this loop?
while (m_previousCheckpoint.load() != m_queryCounter.load())
Collect();
}
tracy_force_inline void SubmitQueueItem(QueueItem* item)
{
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif
Profiler::QueueSerialFinish();
}
tracy_force_inline uint32_t RingIndex(uintptr_t index)
{
index %= MaxQueries;
return static_cast<uint32_t>(index);
}
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
{
// wrap-around safe: all unsigned
uintptr_t count = end - begin;
return static_cast<uint32_t>(count);
}
tracy_force_inline uint32_t RingSize() const
{
return MaxQueries;
}
struct Query { id<MTLCounterSampleBuffer> buffer; uint32_t idx; };
tracy_force_inline Query NextQuery()
{
TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) );
auto id = m_queryCounter.fetch_add(2);
TracyMetalDebug_0b00010( ZoneValue(id) );
auto count = RingCount(m_previousCheckpoint, id);
if (count >= MaxQueries)
{
// TODO: return a proper (hidden) "sentinel" query
Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 };
TracyMetalPanic(
return sentinel,
"NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)",
m_previousCheckpoint.load(), id, count
);
}
uint32_t buffer_idx = (id / MaxQueries) % 2;
TracyMetalDebug_0b00010( ZoneValue(buffer_idx) );
auto buffer = m_counterSampleBuffers[buffer_idx];
if (buffer == nil)
TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id);
uint32_t idx = RingIndex(id);
TracyMetalDebug_0b00010( ZoneValue(idx) );
TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") );
m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now();
return Query{ buffer, idx };
}
tracy_force_inline uint8_t GetContextId() const
{
return m_contextId;
}
static id<MTLCounterSampleBuffer> NewTimestampSampleBuffer(id<MTLDevice> device, size_t count)
{
ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer");
id<MTLCounterSet> timestampCounterSet = nil;
for (id<MTLCounterSet> counterSet in device.counterSets)
{
if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp])
{
timestampCounterSet = counterSet;
break;
}
}
if (timestampCounterSet == nil)
{
TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform.");
}
MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init];
sampleDescriptor.counterSet = timestampCounterSet;
sampleDescriptor.sampleCount = MaxQueries;
sampleDescriptor.storageMode = MTLStorageModeShared;
sampleDescriptor.label = @"TracyMetalTimestampPool";
NSError* error = nil;
id<MTLCounterSampleBuffer> counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error];
if (error != nil)
{
//NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason);
TracyMetalPanic({assert(false);} return nil,
"ERROR: unable to create sample buffer for timestamp counters : %s | %s",
[error.localizedDescription cString], [error.localizedFailureReason cString]);
}
return counterSampleBuffer;
}
uint8_t m_contextId = 255;
id<MTLDevice> m_device = nil;
id<MTLCounterSampleBuffer> m_counterSampleBuffers [2] = {};
using atomic_counter = std::atomic<uintptr_t>;
static_assert(atomic_counter::is_always_lock_free);
atomic_counter m_queryCounter = 0;
atomic_counter m_previousCheckpoint = 0;
MTLTimestamp m_mostRecentTimestamp = 0;
std::vector<std::chrono::high_resolution_clock::time_point> m_timestampRequestTime;
std::mutex m_collectionMutex;
};
class MetalZoneScope
{
public:
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if ( !m_active ) return;
if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil.");
m_ctx = ctx;
auto& query = m_query = ctx->NextQuery();
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
}
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if ( !m_active ) return;
if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil.");
m_ctx = ctx;
auto& query = m_query = ctx->NextQuery();
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
}
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if ( !m_active ) return;
if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil.");
m_ctx = ctx;
auto& query = m_query = ctx->NextQuery();
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0;
desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample;
desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample;
desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1;
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
}
/* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it
tracy_force_inline MetalZoneScope( MetalCtx* ctx, id<MTLComputeCommandEncoder> cmdEncoder, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
m_cmdEncoder = cmdEncoder;
auto& query = m_query = ctx->NextQueryId();
[m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES];
SubmitZoneBeginGpu(ctx, query.idx, srcloc);
}
*/
tracy_force_inline ~MetalZoneScope()
{
if( !m_active ) return;
SubmitZoneEndGpu(m_ctx, m_query.idx + 1);
}
TracyMetalDebugZoneScopeWireTap;
private:
const bool m_active;
MetalCtx* m_ctx;
/* TODO: declare it for "command-level" profiling
id<MTLComputeCommandEncoder> m_cmdEncoder;
*/
static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc)
{
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
Profiler::QueueSerialFinish();
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
}
static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId)
{
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() );
Profiler::QueueSerialFinish();
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
}
MetalCtx::Query m_query = {};
};
}
using TracyMetalCtx = tracy::MetalCtx;
#define TracyMetalContext(device) tracy::MetalCtx::Create(device)
#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx)
#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size)
#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true )
#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true )
#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
#define TracyMetalCollect( ctx ) ctx->Collect();
#undef TracyMetalDebug_ZoneScopeWireTap
#undef TracyMetalDebug_0b00010
#undef TracyMetalDebug_0b10000
#undef TracyMetalDebugMasked
#undef TRACY_METAL_DEBUG_MASK
#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
#undef TracyMetalPanic
#undef TRACY_METAL_VA_ARGS
#endif
#endif//__TRACYMETAL_HMM__

414
tracy/TracyOpenCL.hpp Normal file
View File

@@ -0,0 +1,414 @@
#ifndef __TRACYOPENCL_HPP__
#define __TRACYOPENCL_HPP__
#if !defined TRACY_ENABLE
#define TracyCLContext(c, x) nullptr
#define TracyCLDestroy(c)
#define TracyCLContextName(c, x, y)
#define TracyCLNamedZone(c, x, y, z)
#define TracyCLNamedZoneC(c, x, y, z, w)
#define TracyCLZone(c, x)
#define TracyCLZoneC(c, x, y)
#define TracyCLZoneTransient(c,x,y,z)
#define TracyCLNamedZoneS(c, x, y, z, w)
#define TracyCLNamedZoneCS(c, x, y, z, w, v)
#define TracyCLZoneS(c, x, y)
#define TracyCLZoneCS(c, x, y, z)
#define TracyCLZoneTransientS(c,x,y,z,w)
#define TracyCLNamedZoneSetEvent(x, e)
#define TracyCLZoneSetEvent(e)
#define TracyCLCollect(c)
namespace tracy
{
class OpenCLCtxScope {};
}
using TracyCLCtx = void*;
#else
#include <CL/cl.h>
#include <atomic>
#include <cassert>
#include <sstream>
#include "Tracy.hpp"
#include "../client/TracyCallstack.hpp"
#include "../client/TracyProfiler.hpp"
#include "../common/TracyAlloc.hpp"
#define TRACY_CL_TO_STRING_INDIRECT(T) #T
#define TRACY_CL_TO_STRING(T) TRACY_CL_TO_STRING_INDIRECT(T)
#define TRACY_CL_ASSERT(p) if(!(p)) { \
TracyMessageL( "TRACY_CL_ASSERT failed on " TracyFile ":" TRACY_CL_TO_STRING(TracyLine) ); \
assert(false && "TRACY_CL_ASSERT failed"); \
}
#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \
std::ostringstream oss; \
oss << "TRACY_CL_CHECK_ERROR failed on " << TracyFile << ":" << TracyLine \
<< ": error code " << err; \
auto msg = oss.str(); \
TracyMessage(msg.data(), msg.size()); \
assert(false && "TRACY_CL_CHECK_ERROR failed"); \
}
namespace tracy {
enum class EventPhase : uint8_t
{
Begin,
End
};
struct EventInfo
{
cl_event event;
EventPhase phase;
};
class OpenCLCtx
{
public:
enum { QueryCount = 64 * 1024 };
OpenCLCtx(cl_context context, cl_device_id device)
: m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
, m_head(0)
, m_tail(0)
{
int64_t tcpu, tgpu;
TRACY_CL_ASSERT(m_contextId != 255);
cl_int err = CL_SUCCESS;
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
TRACY_CL_CHECK_ERROR(err)
uint32_t dummyValue = 42;
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
TRACY_CL_CHECK_ERROR(err)
cl_event writeBufferEvent;
TRACY_CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent));
TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &writeBufferEvent));
tcpu = Profiler::GetTime();
cl_int eventStatus;
TRACY_CL_CHECK_ERROR(clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr));
TRACY_CL_ASSERT(eventStatus == CL_COMPLETE);
TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr));
TRACY_CL_CHECK_ERROR(clReleaseEvent(writeBufferEvent));
TRACY_CL_CHECK_ERROR(clReleaseMemObject(dummyBuffer));
TRACY_CL_CHECK_ERROR(clReleaseCommandQueue(queue));
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, tcpu);
MemWrite(&item->gpuNewContext.gpuTime, tgpu);
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite(&item->gpuNewContext.period, 1.0f);
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif
Profiler::QueueSerialFinish();
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, (uint8_t)m_contextId );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect()
{
ZoneScopedC(Color::Red4);
if (m_tail == m_head) return;
#ifdef TRACY_ON_DEMAND
if (!GetProfiler().IsConnected())
{
m_head = m_tail = 0;
}
#endif
for (; m_tail != m_head; m_tail = (m_tail + 1) % QueryCount)
{
EventInfo eventInfo = GetQuery(m_tail);
cl_int eventStatus;
cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
if (err != CL_SUCCESS)
{
std::ostringstream oss;
oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping...";
auto msg = oss.str();
TracyMessage(msg.data(), msg.size());
if (eventInfo.event == nullptr) {
TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check your code!");
}
assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent");
continue;
}
if (eventStatus != CL_COMPLETE) return;
cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin)
? CL_PROFILING_COMMAND_START
: CL_PROFILING_COMMAND_END;
cl_ulong eventTimeStamp = 0;
err = clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr);
if (err == CL_PROFILING_INFO_NOT_AVAILABLE)
{
TracyMessageL("command queue is not created with CL_QUEUE_PROFILING_ENABLE flag, check your code!");
assert(false && "command queue is not created with CL_QUEUE_PROFILING_ENABLE flag");
}
else
TRACY_CL_CHECK_ERROR(err);
TRACY_CL_ASSERT(eventTimeStamp != 0);
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp);
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
if (eventInfo.phase == EventPhase::End)
{
// Done with the event, so release it
TRACY_CL_CHECK_ERROR(clReleaseEvent(eventInfo.event));
}
}
}
tracy_force_inline uint8_t GetId() const
{
return m_contextId;
}
tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo)
{
const auto id = m_head;
m_head = (m_head + 1) % QueryCount;
TRACY_CL_ASSERT(m_head != m_tail);
m_query[id] = eventInfo;
return id;
}
tracy_force_inline EventInfo& GetQuery(unsigned int id)
{
TRACY_CL_ASSERT(id < QueryCount);
return m_query[id];
}
private:
unsigned int m_contextId;
EventInfo m_query[QueryCount];
unsigned int m_head; // index at which a new event should be inserted
unsigned int m_tail; // oldest event
};
class OpenCLCtxScope {
public:
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active&& GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active&& GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
GetProfiler().SendCallstack(depth);
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active && GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active && GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline void SetEvent(cl_event event)
{
if (!m_active) return;
m_event = event;
TRACY_CL_CHECK_ERROR(clRetainEvent(m_event));
m_ctx->GetQuery(m_beginQueryId).event = m_event;
}
tracy_force_inline ~OpenCLCtxScope()
{
if (!m_active) return;
const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End });
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId);
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
Profiler::QueueSerialFinish();
}
const bool m_active;
OpenCLCtx* m_ctx;
cl_event m_event;
unsigned int m_beginQueryId;
};
static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device)
{
auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx));
new (ctx) OpenCLCtx(context, device);
return ctx;
}
static inline void DestroyCLContext(OpenCLCtx* ctx)
{
ctx->~OpenCLCtx();
tracy_free(ctx);
}
} // namespace tracy
using TracyCLCtx = tracy::OpenCLCtx*;
#define TracyCLContext(ctx, device) tracy::CreateCLContext(ctx, device);
#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx);
#define TracyCLContextName(ctx, name, size) ctx->Name(name, size);
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true)
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true)
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active );
#else
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active);
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active);
# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true)
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true )
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active );
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active);
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active);
# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true)
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true)
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active );
#else
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active)
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active)
# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name)
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color)
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) TracyCLZoneTransient( ctx, varname, name, active )
#endif
#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event)
#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event)
#define TracyCLCollect(ctx) ctx->Collect()
#endif
#endif

325
tracy/TracyOpenGL.hpp Normal file
View File

@@ -0,0 +1,325 @@
#ifndef __TRACYOPENGL_HPP__
#define __TRACYOPENGL_HPP__
#if !defined TRACY_ENABLE || defined __APPLE__
#define TracyGpuContext
#define TracyGpuContextName(x,y)
#define TracyGpuNamedZone(x,y,z)
#define TracyGpuNamedZoneC(x,y,z,w)
#define TracyGpuZone(x)
#define TracyGpuZoneC(x,y)
#define TracyGpuZoneTransient(x,y,z)
#define TracyGpuCollect
#define TracyGpuNamedZoneS(x,y,z,w)
#define TracyGpuNamedZoneCS(x,y,z,w,a)
#define TracyGpuZoneS(x,y)
#define TracyGpuZoneCS(x,y,z)
#define TracyGpuZoneTransientS(x,y,z,w)
namespace tracy
{
struct SourceLocationData;
class GpuCtxScope
{
public:
GpuCtxScope( const SourceLocationData*, bool ) {}
GpuCtxScope( const SourceLocationData*, int32_t, bool ) {}
};
}
#else
#include <atomic>
#include <assert.h>
#include <stdlib.h>
#include "Tracy.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
#if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT
# define GL_TIMESTAMP GL_TIMESTAMP_EXT
# define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT
# define glGetQueryObjectiv glGetQueryObjectivEXT
# define glGetQueryObjectui64v glGetQueryObjectui64vEXT
# define glQueryCounter glQueryCounterEXT
#endif
#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active );
#else
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active );
#endif
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
#ifdef TRACY_HAS_CALLSTACK
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active );
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active );
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active );
#else
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
#endif
namespace tracy
{
class GpuCtx
{
friend class GpuCtxScope;
enum { QueryCount = 64 * 1024 };
public:
GpuCtx()
: m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
{
assert( m_context != 255 );
glGenQueries( QueryCount, m_query );
int64_t tgpu;
glGetInteger64v( GL_TIMESTAMP, &tgpu );
int64_t tcpu = Profiler::GetTime();
GLint bits;
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
const float period = 1.f;
const auto thread = GetThreadHandle();
TracyLfqPrepare( QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
MemWrite( &item->gpuNewContext.thread, thread );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
TracyLfqPrepare( QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
}
void Collect()
{
ZoneScopedC( Color::Red4 );
if( m_tail == m_head ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
m_head = m_tail = 0;
return;
}
#endif
while( m_tail != m_head )
{
GLint available;
glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available );
if( !available ) return;
uint64_t time;
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
TracyLfqPrepare( QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
MemWrite( &item->gpuTime.context, m_context );
TracyLfqCommit;
m_tail = ( m_tail + 1 ) % QueryCount;
}
}
private:
tracy_force_inline unsigned int NextQueryId()
{
const auto id = m_head;
m_head = ( m_head + 1 ) % QueryCount;
assert( m_head != m_tail );
return id;
}
tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id )
{
return m_query[id];
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
unsigned int m_query[QueryCount];
uint8_t m_context;
unsigned int m_head;
unsigned int m_tail;
};
class GpuCtxScope
{
public:
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
TracyLfqPrepare( QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
#ifdef TRACY_FIBERS
TracyLfqPrepare( QueueType::GpuZoneBegin );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
#else
GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
#endif
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
#ifdef TRACY_FIBERS
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
#else
GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
#endif
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline ~GpuCtxScope()
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
TracyLfqPrepare( QueueType::GpuZoneEnd );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() );
TracyLfqCommit;
}
private:
const bool m_active;
};
}
#endif
#endif

747
tracy/TracyVulkan.hpp Normal file
View File

@@ -0,0 +1,747 @@
#ifndef __TRACYVULKAN_HPP__
#define __TRACYVULKAN_HPP__
#if !defined TRACY_ENABLE
#define TracyVkContext(x,y,z,w) nullptr
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
#if defined VK_EXT_host_query_reset
#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr
#endif
#define TracyVkDestroy(x)
#define TracyVkContextName(c,x,y)
#define TracyVkNamedZone(c,x,y,z,w)
#define TracyVkNamedZoneC(c,x,y,z,w,a)
#define TracyVkZone(c,x,y)
#define TracyVkZoneC(c,x,y,z)
#define TracyVkZoneTransient(c,x,y,z,w)
#define TracyVkCollect(c,x)
#define TracyVkCollectHost(c)
#define TracyVkNamedZoneS(c,x,y,z,w,a)
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
#define TracyVkZoneS(c,x,y,z)
#define TracyVkZoneCS(c,x,y,z,w)
#define TracyVkZoneTransientS(c,x,y,z,w,a)
namespace tracy
{
class VkCtxScope {};
}
using TracyVkCtx = void*;
#else
#if !defined VK_NULL_HANDLE
# error "You must include Vulkan headers before including TracyVulkan.hpp"
#endif
#include <assert.h>
#include <stdlib.h>
#include "Tracy.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include <atomic>
namespace tracy
{
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define LoadVkDeviceCoreSymbols(Operation) \
Operation(vkBeginCommandBuffer) \
Operation(vkCmdResetQueryPool) \
Operation(vkCmdWriteTimestamp) \
Operation(vkCreateQueryPool) \
Operation(vkDestroyQueryPool) \
Operation(vkEndCommandBuffer) \
Operation(vkGetQueryPoolResults) \
Operation(vkQueueSubmit) \
Operation(vkQueueWaitIdle) \
Operation(vkResetQueryPool)
#define LoadVkDeviceExtensionSymbols(Operation) \
Operation(vkGetCalibratedTimestampsEXT)
#define LoadVkInstanceExtensionSymbols(Operation) \
Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)
#define LoadVkInstanceCoreSymbols(Operation) \
Operation(vkGetPhysicalDeviceProperties)
struct VkSymbolTable
{
#define MAKE_PFN(name) PFN_##name name;
LoadVkDeviceCoreSymbols(MAKE_PFN)
LoadVkDeviceExtensionSymbols(MAKE_PFN)
LoadVkInstanceExtensionSymbols(MAKE_PFN)
LoadVkInstanceCoreSymbols(MAKE_PFN)
#undef MAKE_PFN
};
#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature
#else
#define VK_FUNCTION_WRAPPER(callSignature) callSignature
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature
#endif
class VkCtx
{
friend class VkCtxScope;
enum { QueryCount = 64 * 1024 };
public:
#if defined TRACY_VK_USE_SYMBOL_TABLE
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated )
#else
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT)
#endif
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
#if !defined TRACY_VK_USE_SYMBOL_TABLE
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
#endif
{
assert( m_context != 255 );
#if defined TRACY_VK_USE_SYMBOL_TABLE
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
if ( calibrated )
{
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
}
#endif
if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT )
{
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
}
CreateQueryPool();
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmdbuf;
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
int64_t tcpu, tgpu;
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
{
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) );
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
tcpu = Profiler::GetTime();
VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) );
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) );
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
}
else
{
FindCalibratedTimestampDeviation();
Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime();
}
WriteInitialItem( physdev, tcpu, tgpu );
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
}
#if defined VK_EXT_host_query_reset
/**
* This alternative constructor does not use command buffers and instead uses functionality from
* VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires
* the physical device to have another time domain apart from DEVICE to be calibrateable.
*/
#if defined TRACY_VK_USE_SYMBOL_TABLE
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
#else
VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT )
#endif
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
#if !defined TRACY_VK_USE_SYMBOL_TABLE
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
#endif
{
assert( m_context != 255);
#if defined TRACY_VK_USE_SYMBOL_TABLE
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
#endif
assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr );
assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr );
assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr );
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
// We require a host time domain to be available to properly calibrate.
FindCalibratedTimestampDeviation();
int64_t tgpu;
Calibrate( device, m_prevCalibration, tgpu );
int64_t tcpu = Profiler::GetTime();
CreateQueryPool();
VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) );
WriteInitialItem( physdev, tcpu, tgpu );
// We need the buffer to be twice as large for availability values
size_t resSize = sizeof( int64_t ) * m_queryCount * 2;
m_res = (int64_t*)tracy_malloc( resSize );
}
#endif
~VkCtx()
{
tracy_free( m_res );
VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) );
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect( VkCommandBuffer cmdbuf )
{
ZoneScopedC( Color::Red4 );
const uint64_t head = m_head.load(std::memory_order_relaxed);
if( m_tail == head ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
cmdbuf ?
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ) :
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, 0, m_queryCount ) );
m_tail = head;
m_oldCnt = 0;
int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
return;
}
#endif
assert( head > m_tail );
const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount );
unsigned int cnt;
if( m_oldCnt != 0 )
{
cnt = m_oldCnt;
m_oldCnt = 0;
}
else
{
cnt = (unsigned int)( head - m_tail );
assert( cnt <= m_queryCount );
if( wrappedTail + cnt > m_queryCount )
{
cnt = m_queryCount - wrappedTail;
}
}
VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) );
for( unsigned int idx=0; idx<cnt; idx++ )
{
int64_t avail = m_res[idx * 2 + 1];
if( avail == 0 )
{
m_oldCnt = cnt - idx;
cnt = idx;
break;
}
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] );
MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) );
MemWrite( &item->gpuTime.context, m_context );
Profiler::QueueSerialFinish();
}
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
{
int64_t tgpu, tcpu;
Calibrate( m_device, tcpu, tgpu );
const auto refCpu = Profiler::GetTime();
const auto delta = tcpu - m_prevCalibration;
if( delta > 0 )
{
m_prevCalibration = tcpu;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
Profiler::QueueSerialFinish();
}
}
cmdbuf ?
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ) :
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, wrappedTail, cnt ) );
m_tail += cnt;
}
tracy_force_inline unsigned int NextQueryId()
{
const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed);
return id % m_queryCount;
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
tracy_force_inline VkQueryPool GetQueryPool() const
{
return m_query;
}
private:
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation;
do
{
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
}
while( deviation > m_deviation );
#if defined _WIN32
tGpu = ts[0];
tCpu = ts[1] * m_qpcToNs;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
tGpu = ts[0];
tCpu = ts[1];
#else
assert( false );
#endif
}
tracy_force_inline void CreateQueryPool()
{
VkQueryPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
poolInfo.queryCount = m_queryCount;
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) )
{
m_queryCount /= 2;
poolInfo.queryCount = m_queryCount;
}
}
tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT )
{
uint32_t num;
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr );
if(num > 4) num = 4;
VkTimeDomainEXT data[4];
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data );
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
#if defined _WIN32
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
#endif
for( uint32_t i=0; i<num; i++ ) {
if(data[i] == supportedDomain) {
m_timeDomain = data[i];
break;
}
}
}
tracy_force_inline void FindCalibratedTimestampDeviation()
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
constexpr size_t NumProbes = 32;
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( size_t i=0; i<NumProbes; i++ ) {
m_vkGetCalibratedTimestampsEXT( m_device, 2, spec, ts, deviation + i );
}
uint64_t minDeviation = deviation[0];
for( size_t i=1; i<NumProbes; i++ ) {
if ( minDeviation > deviation[i] ) {
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
#if defined _WIN32
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
#endif
}
tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu )
{
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
VkPhysicalDeviceProperties prop;
VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) );
const float period = prop.limits.timestampPeriod;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
#if defined TRACY_VK_USE_SYMBOL_TABLE
void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
{
#define VK_GET_DEVICE_SYMBOL( name ) \
(PFN_##name)deviceProcAddr( m_device, #name );
#define VK_LOAD_DEVICE_SYMBOL( name ) \
m_symbols.name = VK_GET_DEVICE_SYMBOL( name );
#define VK_GET_INSTANCE_SYMBOL( name ) \
(PFN_##name)instanceProcAddr( instance, #name );
#define VK_LOAD_INSTANCE_SYMBOL( name ) \
m_symbols.name = VK_GET_INSTANCE_SYMBOL( name );
LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL )
LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL )
LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL )
LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL )
#undef VK_GET_DEVICE_SYMBOL
#undef VK_LOAD_DEVICE_SYMBOL
#undef VK_GET_INSTANCE_SYMBOL
#undef VK_LOAD_INSTANCE_SYMBOL
}
#endif
VkDevice m_device;
VkQueryPool m_query;
VkTimeDomainEXT m_timeDomain;
#if defined TRACY_VK_USE_SYMBOL_TABLE
VkSymbolTable m_symbols;
#endif
uint64_t m_deviation;
#ifdef _WIN32
int64_t m_qpcToNs;
#endif
int64_t m_prevCalibration;
uint8_t m_context;
std::atomic<uint64_t> m_head;
uint64_t m_tail;
unsigned int m_oldCnt;
unsigned int m_queryCount;
int64_t* m_res;
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
};
class VkCtxScope
{
public:
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
QueueItem *item;
if( depth > 0 && has_callstack() )
{
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
}
else
{
item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
}
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
QueueItem *item;
if( depth > 0 && has_callstack() )
{
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
}
else
{
item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
}
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline ~VkCtxScope()
{
if( !m_active ) return;
const auto queryId = m_ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
Profiler::QueueSerialFinish();
}
private:
const bool m_active;
VkCommandBuffer m_cmdbuf;
VkCtx* m_ctx;
};
#if defined TRACY_VK_USE_SYMBOL_TABLE
static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false )
#else
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
#endif
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
#if defined TRACY_VK_USE_SYMBOL_TABLE
new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated );
#else
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
#endif
return ctx;
}
#if defined VK_EXT_host_query_reset
#if defined TRACY_VK_USE_SYMBOL_TABLE
static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr )
#else
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
#endif
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
#if defined TRACY_VK_USE_SYMBOL_TABLE
new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr );
#else
new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct );
#endif
return ctx;
}
#endif
static inline void DestroyVkContext( VkCtx* ctx )
{
ctx->~VkCtx();
tracy_free( ctx );
}
}
using TracyVkCtx = tracy::VkCtx*;
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr );
#else
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
#endif
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true );
#else
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
#endif
#if defined VK_EXT_host_query_reset
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr );
#else
#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct );
#endif
#endif
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active )
#else
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active );
#endif
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
#define TracyVkCollectHost( ctx ) ctx->Collect( VK_NULL_HANDLE );
#ifdef TRACY_HAS_CALLSTACK
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, depth, active );
#else
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active )
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active )
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color )
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active )
#endif
#endif
#endif