initial add
This commit is contained in:
278
tracy/Tracy.hpp
Normal file
278
tracy/Tracy.hpp
Normal file
@@ -0,0 +1,278 @@
|
||||
#ifndef __TRACY_HPP__
|
||||
#define __TRACY_HPP__
|
||||
|
||||
#include "../common/TracyColor.hpp"
|
||||
#include "../common/TracySystem.hpp"
|
||||
|
||||
#ifndef TracyFunction
|
||||
# define TracyFunction __FUNCTION__
|
||||
#endif
|
||||
|
||||
#ifndef TracyFile
|
||||
# define TracyFile __FILE__
|
||||
#endif
|
||||
|
||||
#ifndef TracyLine
|
||||
# define TracyLine TracyConcat(__LINE__,U) // MSVC Edit and continue __LINE__ is non-constant. See https://developercommunity.visualstudio.com/t/-line-cannot-be-used-as-an-argument-for-constexpr/195665
|
||||
#endif
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyNoop
|
||||
|
||||
#define ZoneNamed(x,y)
|
||||
#define ZoneNamedN(x,y,z)
|
||||
#define ZoneNamedC(x,y,z)
|
||||
#define ZoneNamedNC(x,y,z,w)
|
||||
|
||||
#define ZoneTransient(x,y)
|
||||
#define ZoneTransientN(x,y,z)
|
||||
|
||||
#define ZoneScoped
|
||||
#define ZoneScopedN(x)
|
||||
#define ZoneScopedC(x)
|
||||
#define ZoneScopedNC(x,y)
|
||||
|
||||
#define ZoneText(x,y)
|
||||
#define ZoneTextV(x,y,z)
|
||||
#define ZoneTextF(x,...)
|
||||
#define ZoneTextVF(x,y,...)
|
||||
#define ZoneName(x,y)
|
||||
#define ZoneNameV(x,y,z)
|
||||
#define ZoneNameF(x,...)
|
||||
#define ZoneNameVF(x,y,...)
|
||||
#define ZoneColor(x)
|
||||
#define ZoneColorV(x,y)
|
||||
#define ZoneValue(x)
|
||||
#define ZoneValueV(x,y)
|
||||
#define ZoneIsActive false
|
||||
#define ZoneIsActiveV(x) false
|
||||
|
||||
#define FrameMark
|
||||
#define FrameMarkNamed(x)
|
||||
#define FrameMarkStart(x)
|
||||
#define FrameMarkEnd(x)
|
||||
|
||||
#define FrameImage(x,y,z,w,a)
|
||||
|
||||
#define TracyLockable( type, varname ) type varname
|
||||
#define TracyLockableN( type, varname, desc ) type varname
|
||||
#define TracySharedLockable( type, varname ) type varname
|
||||
#define TracySharedLockableN( type, varname, desc ) type varname
|
||||
#define LockableBase( type ) type
|
||||
#define SharedLockableBase( type ) type
|
||||
#define LockMark(x) (void)x
|
||||
#define LockableName(x,y,z)
|
||||
|
||||
#define TracyPlot(x,y)
|
||||
#define TracyPlotConfig(x,y,z,w,a)
|
||||
|
||||
#define TracyMessage(x,y)
|
||||
#define TracyMessageL(x)
|
||||
#define TracyMessageC(x,y,z)
|
||||
#define TracyMessageLC(x,y)
|
||||
#define TracyAppInfo(x,y)
|
||||
|
||||
#define TracyAlloc(x,y)
|
||||
#define TracyFree(x)
|
||||
#define TracyMemoryDiscard(x)
|
||||
#define TracySecureAlloc(x,y)
|
||||
#define TracySecureFree(x)
|
||||
#define TracySecureMemoryDiscard(x)
|
||||
|
||||
#define TracyAllocN(x,y,z)
|
||||
#define TracyFreeN(x,y)
|
||||
#define TracySecureAllocN(x,y,z)
|
||||
#define TracySecureFreeN(x,y)
|
||||
|
||||
#define ZoneNamedS(x,y,z)
|
||||
#define ZoneNamedNS(x,y,z,w)
|
||||
#define ZoneNamedCS(x,y,z,w)
|
||||
#define ZoneNamedNCS(x,y,z,w,a)
|
||||
|
||||
#define ZoneTransientS(x,y,z)
|
||||
#define ZoneTransientNS(x,y,z,w)
|
||||
|
||||
#define ZoneScopedS(x)
|
||||
#define ZoneScopedNS(x,y)
|
||||
#define ZoneScopedCS(x,y)
|
||||
#define ZoneScopedNCS(x,y,z)
|
||||
|
||||
#define TracyAllocS(x,y,z)
|
||||
#define TracyFreeS(x,y)
|
||||
#define TracyMemoryDiscardS(x,y)
|
||||
#define TracySecureAllocS(x,y,z)
|
||||
#define TracySecureFreeS(x,y)
|
||||
#define TracySecureMemoryDiscardS(x,y)
|
||||
|
||||
#define TracyAllocNS(x,y,z,w)
|
||||
#define TracyFreeNS(x,y,z)
|
||||
#define TracySecureAllocNS(x,y,z,w)
|
||||
#define TracySecureFreeNS(x,y,z)
|
||||
|
||||
#define TracyMessageS(x,y,z)
|
||||
#define TracyMessageLS(x,y)
|
||||
#define TracyMessageCS(x,y,z,w)
|
||||
#define TracyMessageLCS(x,y,z)
|
||||
|
||||
#define TracySourceCallbackRegister(x,y)
|
||||
#define TracyParameterRegister(x,y)
|
||||
#define TracyParameterSetup(x,y,z,w)
|
||||
#define TracyIsConnected false
|
||||
#define TracyIsStarted false
|
||||
#define TracySetProgramName(x)
|
||||
|
||||
#define TracyFiberEnter(x)
|
||||
#define TracyFiberEnterHint(x,y)
|
||||
#define TracyFiberLeave
|
||||
|
||||
#else
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "../client/TracyLock.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyScoped.hpp"
|
||||
|
||||
#ifndef TRACY_CALLSTACK
|
||||
#define TRACY_CALLSTACK 0
|
||||
#endif
|
||||
|
||||
#define TracyNoop tracy::ProfilerAvailable()
|
||||
|
||||
#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
|
||||
#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active )
|
||||
#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active )
|
||||
#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active )
|
||||
|
||||
#if defined(TRACY_ALLOW_SHADOW_WARNING)
|
||||
#define SuppressVarShadowWarning(Expr) Expr
|
||||
#elif defined(__clang__)
|
||||
#define SuppressVarShadowWarning(Expr) \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Wshadow\"") \
|
||||
Expr; \
|
||||
_Pragma("clang diagnostic pop")
|
||||
#elif defined(__GNUC__)
|
||||
#define SuppressVarShadowWarning(Expr) \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wshadow\"") \
|
||||
Expr; \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
#elif defined(_MSC_VER)
|
||||
#define SuppressVarShadowWarning(Expr) \
|
||||
_Pragma("warning(push)") \
|
||||
_Pragma("warning(disable : 4456)") \
|
||||
Expr; \
|
||||
_Pragma("warning(pop)")
|
||||
#else
|
||||
#define SuppressVarShadowWarning(Expr) Expr
|
||||
#endif
|
||||
|
||||
#define ZoneScoped SuppressVarShadowWarning( ZoneNamed( ___tracy_scoped_zone, true ) )
|
||||
#define ZoneScopedN( name ) SuppressVarShadowWarning( ZoneNamedN( ___tracy_scoped_zone, name, true ) )
|
||||
#define ZoneScopedC( color ) SuppressVarShadowWarning( ZoneNamedC( ___tracy_scoped_zone, color, true ) )
|
||||
#define ZoneScopedNC( name, color ) SuppressVarShadowWarning( ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) )
|
||||
|
||||
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size )
|
||||
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size )
|
||||
#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ )
|
||||
#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ )
|
||||
#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size )
|
||||
#define ZoneNameV( varname, txt, size ) varname.Name( txt, size )
|
||||
#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ )
|
||||
#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ )
|
||||
#define ZoneColor( color ) ___tracy_scoped_zone.Color( color )
|
||||
#define ZoneColorV( varname, color ) varname.Color( color )
|
||||
#define ZoneValue( value ) ___tracy_scoped_zone.Value( value )
|
||||
#define ZoneValueV( varname, value ) varname.Value( value )
|
||||
#define ZoneIsActive ___tracy_scoped_zone.IsActive()
|
||||
#define ZoneIsActiveV( varname ) varname.IsActive()
|
||||
|
||||
#define FrameMark tracy::Profiler::SendFrameMark( nullptr )
|
||||
#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name )
|
||||
#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart )
|
||||
#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd )
|
||||
|
||||
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip )
|
||||
|
||||
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() }
|
||||
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() }
|
||||
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() }
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() }
|
||||
#define LockableBase( type ) tracy::Lockable<type>
|
||||
#define SharedLockableBase( type ) tracy::SharedLockable<type>
|
||||
#define LockMark( varname ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_lock_location_,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &TracyConcat(__tracy_lock_location_,TracyLine) )
|
||||
#define LockableName( varname, txt, size ) varname.CustomName( txt, size )
|
||||
|
||||
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val )
|
||||
#define TracyPlotConfig( name, type, step, fill, color ) tracy::Profiler::ConfigurePlot( name, type, step, fill, color )
|
||||
|
||||
#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size )
|
||||
|
||||
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK )
|
||||
#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK )
|
||||
#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK )
|
||||
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
|
||||
|
||||
#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false )
|
||||
#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false )
|
||||
#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true )
|
||||
#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true )
|
||||
|
||||
#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name )
|
||||
#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name )
|
||||
#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK )
|
||||
#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name )
|
||||
#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name )
|
||||
#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK )
|
||||
|
||||
#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
|
||||
#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active )
|
||||
#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active )
|
||||
|
||||
#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
|
||||
#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
|
||||
#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
|
||||
#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
|
||||
|
||||
#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false )
|
||||
#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false )
|
||||
#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true )
|
||||
#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true )
|
||||
|
||||
#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name )
|
||||
#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name )
|
||||
#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth )
|
||||
#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name )
|
||||
#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name )
|
||||
#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth )
|
||||
|
||||
#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth )
|
||||
#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth )
|
||||
#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth )
|
||||
#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth )
|
||||
|
||||
#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data )
|
||||
#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data )
|
||||
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val )
|
||||
#define TracyIsConnected tracy::GetProfiler().IsConnected()
|
||||
#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name );
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 )
|
||||
# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint )
|
||||
# define TracyFiberLeave tracy::Profiler::LeaveFiber()
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
393
tracy/TracyC.h
Normal file
393
tracy/TracyC.h
Normal file
@@ -0,0 +1,393 @@
|
||||
#ifndef __TRACYC_HPP__
|
||||
#define __TRACYC_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum TracyPlotFormatEnum
|
||||
{
|
||||
TracyPlotFormatNumber,
|
||||
TracyPlotFormatMemory,
|
||||
TracyPlotFormatPercentage,
|
||||
TracyPlotFormatWatt
|
||||
};
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name );
|
||||
|
||||
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
|
||||
|
||||
#ifndef TracyFunction
|
||||
# define TracyFunction __FUNCTION__
|
||||
#endif
|
||||
|
||||
#ifndef TracyFile
|
||||
# define TracyFile __FILE__
|
||||
#endif
|
||||
|
||||
#ifndef TracyLine
|
||||
# define TracyLine __LINE__
|
||||
#endif
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
typedef const void* TracyCZoneCtx;
|
||||
|
||||
typedef const void* TracyCLockCtx;
|
||||
|
||||
#define TracyCZone(c,x)
|
||||
#define TracyCZoneN(c,x,y)
|
||||
#define TracyCZoneC(c,x,y)
|
||||
#define TracyCZoneNC(c,x,y,z)
|
||||
#define TracyCZoneEnd(c)
|
||||
#define TracyCZoneText(c,x,y)
|
||||
#define TracyCZoneName(c,x,y)
|
||||
#define TracyCZoneColor(c,x)
|
||||
#define TracyCZoneValue(c,x)
|
||||
|
||||
#define TracyCAlloc(x,y)
|
||||
#define TracyCFree(x)
|
||||
#define TracyCMemoryDiscard(x)
|
||||
#define TracyCSecureAlloc(x,y)
|
||||
#define TracyCSecureFree(x)
|
||||
#define TracyCSecureMemoryDiscard(x)
|
||||
|
||||
#define TracyCAllocN(x,y,z)
|
||||
#define TracyCFreeN(x,y)
|
||||
#define TracyCSecureAllocN(x,y,z)
|
||||
#define TracyCSecureFreeN(x,y)
|
||||
|
||||
#define TracyCFrameMark
|
||||
#define TracyCFrameMarkNamed(x)
|
||||
#define TracyCFrameMarkStart(x)
|
||||
#define TracyCFrameMarkEnd(x)
|
||||
#define TracyCFrameImage(x,y,z,w,a)
|
||||
|
||||
#define TracyCPlot(x,y)
|
||||
#define TracyCPlotF(x,y)
|
||||
#define TracyCPlotI(x,y)
|
||||
#define TracyCPlotConfig(x,y,z,w,a)
|
||||
|
||||
#define TracyCMessage(x,y)
|
||||
#define TracyCMessageL(x)
|
||||
#define TracyCMessageC(x,y,z)
|
||||
#define TracyCMessageLC(x,y)
|
||||
#define TracyCAppInfo(x,y)
|
||||
|
||||
#define TracyCZoneS(x,y,z)
|
||||
#define TracyCZoneNS(x,y,z,w)
|
||||
#define TracyCZoneCS(x,y,z,w)
|
||||
#define TracyCZoneNCS(x,y,z,w,a)
|
||||
|
||||
#define TracyCAllocS(x,y,z)
|
||||
#define TracyCFreeS(x,y)
|
||||
#define TracyCMemoryDiscardS(x,y)
|
||||
#define TracyCSecureAllocS(x,y,z)
|
||||
#define TracyCSecureFreeS(x,y)
|
||||
#define TracyCSecureMemoryDiscardS(x,y)
|
||||
|
||||
#define TracyCAllocNS(x,y,z,w)
|
||||
#define TracyCFreeNS(x,y,z)
|
||||
#define TracyCSecureAllocNS(x,y,z,w)
|
||||
#define TracyCSecureFreeNS(x,y,z)
|
||||
|
||||
#define TracyCMessageS(x,y,z)
|
||||
#define TracyCMessageLS(x,y)
|
||||
#define TracyCMessageCS(x,y,z,w)
|
||||
#define TracyCMessageLCS(x,y,z)
|
||||
|
||||
#define TracyCLockCtx(l)
|
||||
#define TracyCLockAnnounce(l)
|
||||
#define TracyCLockTerminate(l)
|
||||
#define TracyCLockBeforeLock(l)
|
||||
#define TracyCLockAfterLock(l)
|
||||
#define TracyCLockAfterUnlock(l)
|
||||
#define TracyCLockAfterTryLock(l,x)
|
||||
#define TracyCLockMark(l)
|
||||
#define TracyCLockCustomName(l,x,y)
|
||||
|
||||
#define TracyCIsConnected 0
|
||||
#define TracyCIsStarted 0
|
||||
|
||||
#define TracyCBeginSamplingProfiling() 0
|
||||
#define TracyCEndSamplingProfiling()
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyCFiberEnter(fiber)
|
||||
# define TracyCFiberLeave
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifndef TracyConcat
|
||||
# define TracyConcat(x,y) TracyConcatIndirect(x,y)
|
||||
#endif
|
||||
#ifndef TracyConcatIndirect
|
||||
# define TracyConcatIndirect(x,y) x##y
|
||||
#endif
|
||||
|
||||
struct ___tracy_source_location_data
|
||||
{
|
||||
const char* name;
|
||||
const char* function;
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
uint32_t color;
|
||||
};
|
||||
|
||||
struct ___tracy_c_zone_context
|
||||
{
|
||||
uint32_t id;
|
||||
int32_t active;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_time_data
|
||||
{
|
||||
int64_t gpuTime;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_zone_begin_data {
|
||||
uint64_t srcloc;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_zone_begin_callstack_data {
|
||||
uint64_t srcloc;
|
||||
int32_t depth;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_zone_end_data {
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_new_context_data {
|
||||
int64_t gpuTime;
|
||||
float period;
|
||||
uint8_t context;
|
||||
uint8_t flags;
|
||||
uint8_t type;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_context_name_data {
|
||||
uint8_t context;
|
||||
const char* name;
|
||||
uint16_t len;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_calibration_data {
|
||||
int64_t gpuTime;
|
||||
int64_t cpuDelta;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_time_sync_data {
|
||||
int64_t gpuTime;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct __tracy_lockable_context_data;
|
||||
|
||||
// Some containers don't support storing const types.
|
||||
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
|
||||
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
|
||||
|
||||
typedef struct __tracy_lockable_context_data* TracyCLockCtx;
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
TRACY_API void ___tracy_startup_profiler(void);
|
||||
TRACY_API void ___tracy_shutdown_profiler(void);
|
||||
TRACY_API int32_t ___tracy_profiler_started(void);
|
||||
|
||||
# define TracyCIsStarted ___tracy_profiler_started()
|
||||
#else
|
||||
# define TracyCIsStarted 1
|
||||
#endif
|
||||
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color );
|
||||
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active );
|
||||
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
|
||||
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
|
||||
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data );
|
||||
TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data );
|
||||
TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data );
|
||||
TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data );
|
||||
TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data );
|
||||
TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data );
|
||||
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data );
|
||||
TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data );
|
||||
TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data );
|
||||
TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data );
|
||||
TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data );
|
||||
TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data );
|
||||
|
||||
TRACY_API int32_t ___tracy_connected(void);
|
||||
|
||||
#ifndef TRACY_CALLSTACK
|
||||
#define TRACY_CALLSTACK 0
|
||||
#endif
|
||||
|
||||
#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
|
||||
#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx );
|
||||
|
||||
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
|
||||
#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size );
|
||||
#define TracyCZoneColor( ctx, color ) ___tracy_emit_zone_color( ctx, color );
|
||||
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth );
|
||||
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth );
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth );
|
||||
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth );
|
||||
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth );
|
||||
|
||||
#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
|
||||
#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
|
||||
#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK );
|
||||
#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
|
||||
#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
|
||||
#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK );
|
||||
|
||||
#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
|
||||
#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
|
||||
#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
|
||||
#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
|
||||
|
||||
#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
|
||||
#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
|
||||
#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
|
||||
#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_frame_mark( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_mark_start( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_mark_end( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip );
|
||||
|
||||
#define TracyCFrameMark ___tracy_emit_frame_mark( 0 );
|
||||
#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name );
|
||||
#define TracyCFrameMarkStart( name ) ___tracy_emit_frame_mark_start( name );
|
||||
#define TracyCFrameMarkEnd( name ) ___tracy_emit_frame_mark_end( name );
|
||||
#define TracyCFrameImage( image, width, height, offset, flip ) ___tracy_emit_frame_image( image, width, height, offset, flip );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_plot( const char* name, double val );
|
||||
TRACY_API void ___tracy_emit_plot_float( const char* name, float val );
|
||||
TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val );
|
||||
TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
|
||||
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
|
||||
#define TracyCPlotF( name, val ) ___tracy_emit_plot_float( name, val );
|
||||
#define TracyCPlotI( name, val ) ___tracy_emit_plot_int( name, val );
|
||||
#define TracyCPlotConfig( name, type, step, fill, color ) ___tracy_emit_plot_config( name, type, step, fill, color );
|
||||
#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
|
||||
|
||||
|
||||
#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
|
||||
#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
|
||||
#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
|
||||
#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth )
|
||||
#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
|
||||
#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
|
||||
#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth )
|
||||
|
||||
#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
|
||||
#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
|
||||
#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
|
||||
#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
|
||||
|
||||
#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
|
||||
#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
|
||||
#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
|
||||
#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
|
||||
|
||||
|
||||
TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc );
|
||||
TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired );
|
||||
TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc );
|
||||
TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz );
|
||||
|
||||
#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) );
|
||||
#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock );
|
||||
#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock );
|
||||
#define TracyCLockAfterLock( lock ) ___tracy_after_lock_lockable_ctx( lock );
|
||||
#define TracyCLockAfterUnlock( lock ) ___tracy_after_unlock_lockable_ctx( lock );
|
||||
#define TracyCLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_lockable_ctx( lock, acquired );
|
||||
#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) );
|
||||
#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz );
|
||||
|
||||
#define TracyCIsConnected ___tracy_connected()
|
||||
|
||||
TRACY_API int ___tracy_begin_sampling_profiler( void );
|
||||
TRACY_API void ___tracy_end_sampling_profiler( void );
|
||||
|
||||
#define TracyCBeginSamplingProfiling() ___tracy_begin_sampling_profiling()
|
||||
#define TracyCEndSamplingProfiling() ___tracy_end_sampling_profiling()
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
TRACY_API void ___tracy_fiber_enter( const char* fiber );
|
||||
TRACY_API void ___tracy_fiber_leave( void );
|
||||
|
||||
# define TracyCFiberEnter( fiber ) ___tracy_fiber_enter( fiber );
|
||||
# define TracyCFiberLeave ___tracy_fiber_leave();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1325
tracy/TracyCUDA.hpp
Normal file
1325
tracy/TracyCUDA.hpp
Normal file
File diff suppressed because it is too large
Load Diff
466
tracy/TracyD3D11.hpp
Normal file
466
tracy/TracyD3D11.hpp
Normal file
@@ -0,0 +1,466 @@
|
||||
#ifndef __TRACYD3D11_HPP__
|
||||
#define __TRACYD3D11_HPP__
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyD3D11Context(device,queue) nullptr
|
||||
#define TracyD3D11Destroy(ctx)
|
||||
#define TracyD3D11ContextName(ctx, name, size)
|
||||
|
||||
#define TracyD3D11NewFrame(ctx)
|
||||
|
||||
#define TracyD3D11Zone(ctx, name)
|
||||
#define TracyD3D11ZoneC(ctx, name, color)
|
||||
#define TracyD3D11NamedZone(ctx, varname, name, active)
|
||||
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
|
||||
#define TracyD3D11ZoneTransient(ctx, varname, name, active)
|
||||
|
||||
#define TracyD3D11ZoneS(ctx, name, depth)
|
||||
#define TracyD3D11ZoneCS(ctx, name, color, depth)
|
||||
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
|
||||
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
|
||||
#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active)
|
||||
|
||||
#define TracyD3D11Collect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D11ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D11Ctx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
#include "../common/TracyYield.hpp"
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class D3D11Ctx
|
||||
{
|
||||
friend class D3D11ZoneScope;
|
||||
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024;
|
||||
|
||||
enum CollectMode { POLL, BLOCK };
|
||||
|
||||
public:
|
||||
D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
{
|
||||
// TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument
|
||||
m_device = device;
|
||||
device->AddRef();
|
||||
m_immediateDevCtx = devicectx;
|
||||
devicectx->AddRef();
|
||||
|
||||
{
|
||||
D3D11_QUERY_DESC desc = { };
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery)))
|
||||
{
|
||||
TracyD3D11Panic("unable to create disjoint timestamp query.", return);
|
||||
}
|
||||
}
|
||||
|
||||
for (ID3D11Query*& query : m_queries)
|
||||
{
|
||||
D3D11_QUERY_DESC desc = { };
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
if (FAILED(m_device->CreateQuery(&desc, &query)))
|
||||
{
|
||||
TracyD3D11Panic("unable to create timestamp query.", return);
|
||||
}
|
||||
}
|
||||
|
||||
// Calibrate CPU and GPU timestamps
|
||||
int64_t tcpu = 0;
|
||||
int64_t tgpu = 0;
|
||||
for (int attempts = 0; attempts < 50; attempts++)
|
||||
{
|
||||
m_immediateDevCtx->Begin(m_disjointQuery);
|
||||
m_immediateDevCtx->End(m_queries[0]);
|
||||
m_immediateDevCtx->End(m_disjointQuery);
|
||||
|
||||
int64_t tcpu0 = Profiler::GetTime();
|
||||
WaitForQuery(m_disjointQuery);
|
||||
// NOTE: one would expect that by waiting for the enclosing disjoint query to finish,
|
||||
// all timestamp queries within would also be readily available, but that does not
|
||||
// seem to be the case here... See https://github.com/wolfpld/tracy/issues/947
|
||||
WaitForQuery(m_queries[0]);
|
||||
int64_t tcpu1 = Profiler::GetTime();
|
||||
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
|
||||
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK)
|
||||
{
|
||||
TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (disjoint.Disjoint)
|
||||
continue;
|
||||
|
||||
UINT64 timestamp = 0;
|
||||
if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK)
|
||||
continue; // this should never happen (we waited for the query to finish above)
|
||||
|
||||
tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2;
|
||||
tgpu = timestamp * (1000000000 / disjoint.Frequency);
|
||||
break;
|
||||
}
|
||||
|
||||
// ready to roll
|
||||
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||
m_immediateDevCtx->Begin(m_disjointQuery);
|
||||
m_previousCheckpoint = m_nextCheckpoint = 0;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()?
|
||||
MemWrite( &item->gpuNewContext.period, 1.0f );
|
||||
MemWrite( &item->gpuNewContext.context, m_contextId);
|
||||
MemWrite( &item->gpuNewContext.flags, uint8_t(0) );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
~D3D11Ctx()
|
||||
{
|
||||
// collect all pending timestamps before destroying everything
|
||||
do
|
||||
{
|
||||
Collect(BLOCK);
|
||||
} while (m_previousCheckpoint != m_queryCounter);
|
||||
|
||||
for (ID3D11Query* query : m_queries)
|
||||
{
|
||||
query->Release();
|
||||
}
|
||||
m_immediateDevCtx->End(m_disjointQuery);
|
||||
m_disjointQuery->Release();
|
||||
m_immediateDevCtx->Release();
|
||||
m_device->Release();
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_contextId );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect(CollectMode mode = POLL)
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_previousCheckpoint = m_nextCheckpoint = m_queryCounter;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (m_previousCheckpoint == m_nextCheckpoint)
|
||||
{
|
||||
uintptr_t nextCheckpoint = m_queryCounter;
|
||||
if (nextCheckpoint == m_nextCheckpoint)
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_nextCheckpoint = nextCheckpoint;
|
||||
m_immediateDevCtx->End(m_disjointQuery);
|
||||
}
|
||||
|
||||
if (mode == CollectMode::BLOCK)
|
||||
{
|
||||
WaitForQuery(m_disjointQuery);
|
||||
}
|
||||
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
|
||||
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (disjoint.Disjoint == TRUE)
|
||||
{
|
||||
m_previousCheckpoint = m_nextCheckpoint;
|
||||
TracyD3D11Panic("disjoint timestamps detected; dropping.");
|
||||
return;
|
||||
}
|
||||
|
||||
auto begin = m_previousCheckpoint;
|
||||
auto end = m_nextCheckpoint;
|
||||
for (auto i = begin; i != end; ++i)
|
||||
{
|
||||
uint32_t k = RingIndex(i);
|
||||
UINT64 timestamp = 0;
|
||||
if (m_immediateDevCtx->GetData(m_queries[k], ×tamp, sizeof(timestamp), 0) != S_OK)
|
||||
{
|
||||
TracyD3D11Panic("timestamp expected to be ready, but it was not!");
|
||||
break;
|
||||
}
|
||||
timestamp *= (1000000000ull / disjoint.Frequency);
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(timestamp));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
// disjoint timestamp queries should only be invoked once per frame or less
|
||||
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query
|
||||
m_immediateDevCtx->Begin(m_disjointQuery);
|
||||
m_previousCheckpoint = m_nextCheckpoint;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline uint32_t RingIndex(uintptr_t index)
|
||||
{
|
||||
index %= MaxQueries;
|
||||
return static_cast<uint32_t>(index);
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
|
||||
{
|
||||
// wrap-around safe: all unsigned
|
||||
uintptr_t count = end - begin;
|
||||
return static_cast<uint32_t>(count);
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
auto id = m_queryCounter++;
|
||||
if (RingCount(m_previousCheckpoint, id) >= MaxQueries)
|
||||
{
|
||||
TracyD3D11Panic("too many pending timestamp queries.");
|
||||
// #TODO: return some sentinel value; ideally a "hidden" query index
|
||||
}
|
||||
return RingIndex(id);
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id)
|
||||
{
|
||||
return m_queries[id];
|
||||
}
|
||||
|
||||
tracy_force_inline void WaitForQuery(ID3D11Query* query)
|
||||
{
|
||||
m_immediateDevCtx->Flush();
|
||||
while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK)
|
||||
YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends...
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetContextId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
ID3D11Device* m_device = nullptr;
|
||||
ID3D11DeviceContext* m_immediateDevCtx = nullptr;
|
||||
|
||||
ID3D11Query* m_queries[MaxQueries];
|
||||
ID3D11Query* m_disjointQuery = nullptr;
|
||||
|
||||
uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere?
|
||||
|
||||
uintptr_t m_queryCounter = 0;
|
||||
|
||||
uintptr_t m_previousCheckpoint = 0;
|
||||
uintptr_t m_nextCheckpoint = 0;
|
||||
};
|
||||
|
||||
class D3D11ZoneScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active )
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active )
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active)
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
if ( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D11ZoneScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
}
|
||||
|
||||
void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation)
|
||||
{
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
|
||||
|
||||
MemWrite( &item->hdr.type, queueItemType);
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
const bool m_active;
|
||||
|
||||
D3D11Ctx* m_ctx;
|
||||
};
|
||||
|
||||
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
{
|
||||
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
|
||||
new(ctx) D3D11Ctx( device, devicectx );
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D11Context( D3D11Ctx* ctx )
|
||||
{
|
||||
ctx->~D3D11Ctx();
|
||||
tracy_free( ctx );
|
||||
}
|
||||
}
|
||||
|
||||
#undef TracyD3D11Panic
|
||||
|
||||
using TracyD3D11Ctx = tracy::D3D11Ctx*;
|
||||
|
||||
#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx );
|
||||
#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx);
|
||||
#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#define TracyD3D11UnnamedZone ___tracy_gpu_d3d11_zone
|
||||
#define TracyD3D11SrcLocSymbol TracyConcat(__tracy_gpu_d3d11_source_location,TracyLine)
|
||||
#define TracyD3D11SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D11SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, TracyD3D11UnnamedZone, name, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, TracyD3D11UnnamedZone, name, color, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, depth, true )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, depth, true )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active );
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active );
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), depth, active };
|
||||
#else
|
||||
# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active )
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active )
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D11ZoneTransient(ctx, varname, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyD3D11Collect( ctx ) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
500
tracy/TracyD3D12.hpp
Normal file
500
tracy/TracyD3D12.hpp
Normal file
@@ -0,0 +1,500 @@
|
||||
#ifndef __TRACYD3D12_HPP__
|
||||
#define __TRACYD3D12_HPP__
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyD3D12Context(device, queue) nullptr
|
||||
#define TracyD3D12Destroy(ctx)
|
||||
#define TracyD3D12ContextName(ctx, name, size)
|
||||
|
||||
#define TracyD3D12NewFrame(ctx)
|
||||
|
||||
#define TracyD3D12Zone(ctx, cmdList, name)
|
||||
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
|
||||
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||
#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
|
||||
|
||||
#define TracyD3D12ZoneS(ctx, cmdList, name, depth)
|
||||
#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth)
|
||||
#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active)
|
||||
#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active)
|
||||
#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active)
|
||||
|
||||
#define TracyD3D12Collect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D12ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
#include <d3d12.h>
|
||||
#include <dxgi.h>
|
||||
#include <queue>
|
||||
|
||||
#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct D3D12QueryPayload
|
||||
{
|
||||
uint32_t m_queryIdStart = 0;
|
||||
uint32_t m_queryCount = 0;
|
||||
};
|
||||
|
||||
// Command queue context.
|
||||
class D3D12QueueCtx
|
||||
{
|
||||
friend class D3D12ZoneScope;
|
||||
|
||||
ID3D12Device* m_device = nullptr;
|
||||
ID3D12CommandQueue* m_queue = nullptr;
|
||||
uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere?
|
||||
ID3D12QueryHeap* m_queryHeap = nullptr;
|
||||
ID3D12Resource* m_readbackBuffer = nullptr;
|
||||
|
||||
// In-progress payload.
|
||||
uint32_t m_queryLimit = 0;
|
||||
std::atomic<uint32_t> m_queryCounter = 0;
|
||||
uint32_t m_previousQueryCounter = 0;
|
||||
|
||||
uint32_t m_activePayload = 0;
|
||||
ID3D12Fence* m_payloadFence = nullptr;
|
||||
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||
|
||||
UINT64 m_prevCalibrationTicksCPU = 0;
|
||||
|
||||
void RecalibrateClocks()
|
||||
{
|
||||
UINT64 cpuTimestamp;
|
||||
UINT64 gpuTimestamp;
|
||||
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
TracyD3D12Panic("failed to obtain queue clock calibration counters.", return);
|
||||
}
|
||||
|
||||
int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU;
|
||||
if (cpuDeltaTicks > 0)
|
||||
{
|
||||
static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc();
|
||||
int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick;
|
||||
// Save the device cpu timestamp, not the Tracy profiler timestamp:
|
||||
m_prevCalibrationTicksCPU = cpuTimestamp;
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
|
||||
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS);
|
||||
MemWrite(&item->gpuCalibration.context, GetId());
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item)
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
public:
|
||||
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
: m_device(device)
|
||||
, m_queue(queue)
|
||||
{
|
||||
// Verify we support timestamp queries on this queue.
|
||||
|
||||
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
|
||||
|
||||
HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData));
|
||||
if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE))
|
||||
{
|
||||
TracyD3D12Panic("Platform does not support profiling of copy queues.", return);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs
|
||||
m_queryLimit = MaxQueries;
|
||||
|
||||
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
|
||||
{
|
||||
m_queryLimit /= 2;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
}
|
||||
|
||||
// Create a readback buffer, which will be used as a destination for the query data.
|
||||
|
||||
D3D12_RESOURCE_DESC readbackBufferDesc{};
|
||||
readbackBufferDesc.Alignment = 0;
|
||||
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
|
||||
readbackBufferDesc.Height = 1;
|
||||
readbackBufferDesc.DepthOrArraySize = 1;
|
||||
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
|
||||
readbackBufferDesc.MipLevels = 1;
|
||||
readbackBufferDesc.SampleDesc.Count = 1;
|
||||
readbackBufferDesc.SampleDesc.Quality = 0;
|
||||
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
|
||||
D3D12_HEAP_PROPERTIES readbackHeapProps{};
|
||||
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
readbackHeapProps.CreationNodeMask = 0;
|
||||
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
|
||||
{
|
||||
TracyD3D12Panic("Failed to create query readback buffer.", return);
|
||||
}
|
||||
|
||||
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
|
||||
{
|
||||
TracyD3D12Panic("Failed to create payload fence.", return);
|
||||
}
|
||||
|
||||
float period = [queue]()
|
||||
{
|
||||
uint64_t timestampFrequency;
|
||||
if (FAILED(queue->GetTimestampFrequency(×tampFrequency)))
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return static_cast<float>( 1E+09 / static_cast<double>(timestampFrequency) );
|
||||
}();
|
||||
|
||||
if (period == 0.0f)
|
||||
{
|
||||
TracyD3D12Panic("Failed to get timestamp frequency.", return);
|
||||
}
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
TracyD3D12Panic("Failed to get queue clock calibration.", return);
|
||||
}
|
||||
|
||||
// Save the device cpu timestamp, not the profiler's timestamp.
|
||||
m_prevCalibrationTicksCPU = cpuTimestamp;
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
// all checked: ready to roll
|
||||
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()?
|
||||
MemWrite(&item->gpuNewContext.period, period);
|
||||
MemWrite(&item->gpuNewContext.context, GetId());
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
~D3D12QueueCtx()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
// collect all pending timestamps
|
||||
while (m_payloadFence->GetCompletedValue() != m_activePayload)
|
||||
/* busy-wait ... */;
|
||||
Collect();
|
||||
m_payloadFence->Release();
|
||||
m_readbackBuffer->Release();
|
||||
m_queryHeap->Release();
|
||||
}
|
||||
|
||||
|
||||
void NewFrame()
|
||||
{
|
||||
uint32_t queryCounter = m_queryCounter.exchange(0);
|
||||
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter });
|
||||
m_previousQueryCounter += queryCounter;
|
||||
|
||||
if (m_previousQueryCounter >= m_queryLimit)
|
||||
{
|
||||
m_previousQueryCounter -= m_queryLimit;
|
||||
}
|
||||
|
||||
m_queue->Signal(m_payloadFence, ++m_activePayload);
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, GetId());
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_queryCounter = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find out what payloads are available.
|
||||
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
|
||||
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
|
||||
|
||||
if (!payloadCount)
|
||||
{
|
||||
return; // No payloads are available yet, exit out.
|
||||
}
|
||||
|
||||
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
|
||||
|
||||
// Map the readback buffer so we can fetch the query data from the GPU.
|
||||
void* readbackBufferMapping = nullptr;
|
||||
|
||||
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
|
||||
{
|
||||
TracyD3D12Panic("Failed to map readback buffer.", return);
|
||||
}
|
||||
|
||||
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
|
||||
|
||||
for (uint32_t i = 0; i < payloadCount; ++i)
|
||||
{
|
||||
const auto& payload = m_payloadQueue.front();
|
||||
|
||||
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
|
||||
{
|
||||
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
|
||||
const auto timestamp = timestampData[counter];
|
||||
const auto queryId = counter;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, timestamp);
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuTime.context, GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
m_payloadQueue.pop();
|
||||
}
|
||||
|
||||
m_readbackBuffer->Unmap(0, nullptr);
|
||||
|
||||
// Recalibrate to account for drift.
|
||||
RecalibrateClocks();
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
uint32_t queryCounter = m_queryCounter.fetch_add(2);
|
||||
if (queryCounter >= m_queryLimit)
|
||||
{
|
||||
TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries.");
|
||||
// #TODO: consider returning an invalid id or sentinel value here
|
||||
}
|
||||
|
||||
const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
};
|
||||
|
||||
class D3D12ZoneScope
|
||||
{
|
||||
const bool m_active;
|
||||
D3D12QueueCtx* m_ctx = nullptr;
|
||||
ID3D12GraphicsCommandList* m_cmdList = nullptr;
|
||||
uint32_t m_queryId = 0; // Used for tracking in nested zones.
|
||||
|
||||
tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation)
|
||||
{
|
||||
MemWrite(&item->hdr.type, type);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, srcLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = m_ctx->NextQueryId();
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
}
|
||||
|
||||
public:
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcLocation));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcLocation));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D12ZoneScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t));
|
||||
}
|
||||
};
|
||||
|
||||
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
{
|
||||
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||
new (ctx) D3D12QueueCtx{ device, queue };
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
|
||||
{
|
||||
ctx->~D3D12QueueCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#undef TracyD3D12Panic
|
||||
|
||||
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
|
||||
|
||||
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
|
||||
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
|
||||
#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
|
||||
|
||||
#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone
|
||||
#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine)
|
||||
#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
|
||||
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active };
|
||||
#else
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyD3D12Collect(ctx) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
486
tracy/TracyLua.hpp
Normal file
486
tracy/TracyLua.hpp
Normal file
@@ -0,0 +1,486 @@
|
||||
#ifndef __TRACYLUA_HPP__
|
||||
#define __TRACYLUA_HPP__
|
||||
|
||||
// Include this file after you include lua headers.
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#include <string.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
static inline int noop( lua_State* L ) { return 0; }
|
||||
}
|
||||
|
||||
static inline void LuaRegister( lua_State* L )
|
||||
{
|
||||
lua_newtable( L );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBegin" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBeginN" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBeginS" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBeginNS" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneEnd" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneText" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneName" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "Message" );
|
||||
lua_setglobal( L, "tracy" );
|
||||
}
|
||||
|
||||
static inline char* FindEnd( char* ptr )
|
||||
{
|
||||
unsigned int cnt = 1;
|
||||
while( cnt != 0 )
|
||||
{
|
||||
if( *ptr == '(' ) cnt++;
|
||||
else if( *ptr == ')' ) cnt--;
|
||||
ptr++;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void LuaRemove( char* script )
|
||||
{
|
||||
while( *script )
|
||||
{
|
||||
if( strncmp( script, "tracy.", 6 ) == 0 )
|
||||
{
|
||||
if( strncmp( script + 6, "Zone", 4 ) == 0 )
|
||||
{
|
||||
if( strncmp( script + 10, "End()", 5 ) == 0 )
|
||||
{
|
||||
memset( script, ' ', 15 );
|
||||
script += 15;
|
||||
}
|
||||
else if( strncmp( script + 10, "Begin()", 7 ) == 0 )
|
||||
{
|
||||
memset( script, ' ', 17 );
|
||||
script += 17;
|
||||
}
|
||||
else if( strncmp( script + 10, "Text(", 5 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 15 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "Name(", 5 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 15 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "BeginN(", 7 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 17 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "BeginS(", 7 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 17 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "BeginNS(", 8 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 18 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
script += 10;
|
||||
}
|
||||
}
|
||||
else if( strncmp( script + 6, "Message(", 8 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 14 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
script += 6;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
script++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void LuaHook( lua_State* L, lua_Debug* ar ) {}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits>
|
||||
|
||||
#include "../common/TracyColor.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
TRACY_API LuaZoneState& GetLuaZoneState();
|
||||
#endif
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
static inline void LuaShortenSrc( char* dst, const char* src )
|
||||
{
|
||||
size_t l = std::min( (size_t)255, strlen( src ) );
|
||||
memcpy( dst, src, l );
|
||||
dst[l] = 0;
|
||||
}
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
{
|
||||
assert( depth <= 64 );
|
||||
lua_Debug dbg[64];
|
||||
const char* func[64];
|
||||
uint32_t fsz[64];
|
||||
uint32_t ssz[64];
|
||||
|
||||
uint8_t cnt;
|
||||
uint16_t spaceNeeded = sizeof( cnt );
|
||||
for( cnt=0; cnt<depth; cnt++ )
|
||||
{
|
||||
if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break;
|
||||
lua_getinfo( L, "Snl", dbg+cnt );
|
||||
func[cnt] = dbg[cnt].name ? dbg[cnt].name : dbg[cnt].short_src;
|
||||
fsz[cnt] = uint32_t( strlen( func[cnt] ) );
|
||||
ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) );
|
||||
spaceNeeded += fsz[cnt] + ssz[cnt];
|
||||
}
|
||||
spaceNeeded += cnt * ( 4 + 2 + 2 ); // source line, function string length, source string length
|
||||
|
||||
auto ptr = (char*)tracy_malloc( spaceNeeded + 2 );
|
||||
auto dst = ptr;
|
||||
memcpy( dst, &spaceNeeded, 2 ); dst += 2;
|
||||
memcpy( dst, &cnt, 1 ); dst++;
|
||||
for( uint8_t i=0; i<cnt; i++ )
|
||||
{
|
||||
const uint32_t line = dbg[i].currentline;
|
||||
memcpy( dst, &line, 4 ); dst += 4;
|
||||
assert( fsz[i] <= (std::numeric_limits<uint16_t>::max)() );
|
||||
memcpy( dst, fsz+i, 2 ); dst += 2;
|
||||
memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
|
||||
assert( ssz[i] <= (std::numeric_limits<uint16_t>::max)() );
|
||||
memcpy( dst, ssz+i, 2 ); dst += 2;
|
||||
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
|
||||
}
|
||||
assert( dst - ptr == spaceNeeded + 2 );
|
||||
|
||||
TracyQueuePrepare( QueueType::CallstackAlloc );
|
||||
MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) );
|
||||
TracyQueueCommit( callstackAllocFatThread );
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginS( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
#else
|
||||
const auto depth = uint32_t( lua_tointeger( L, 1 ) );
|
||||
#endif
|
||||
SendLuaCallstack( L, depth );
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
char src[256];
|
||||
LuaShortenSrc( src, dbg.source );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginNS( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
#else
|
||||
const auto depth = uint32_t( lua_tointeger( L, 2 ) );
|
||||
#endif
|
||||
SendLuaCallstack( L, depth );
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
size_t nsz;
|
||||
char src[256];
|
||||
LuaShortenSrc( src, dbg.source );
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int LuaZoneBegin( lua_State* L )
|
||||
{
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
return LuaZoneBeginS( L );
|
||||
#else
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
char src[256];
|
||||
LuaShortenSrc( src, dbg.source );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginN( lua_State* L )
|
||||
{
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
return LuaZoneBeginNS( L );
|
||||
#else
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
size_t nsz;
|
||||
char src[256];
|
||||
LuaShortenSrc( src, dbg.source );
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int LuaZoneEnd( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
assert( GetLuaZoneState().counter != 0 );
|
||||
GetLuaZoneState().counter--;
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
TracyQueueCommit( zoneEndThread );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneText( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneName( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaMessage( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return 0;
|
||||
#endif
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( QueueType::Message );
|
||||
MemWrite( &item->messageFat.time, Profiler::GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( messageFatThread );
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static inline void LuaRegister( lua_State* L )
|
||||
{
|
||||
lua_newtable( L );
|
||||
lua_pushcfunction( L, detail::LuaZoneBegin );
|
||||
lua_setfield( L, -2, "ZoneBegin" );
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginN );
|
||||
lua_setfield( L, -2, "ZoneBeginN" );
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginS );
|
||||
lua_setfield( L, -2, "ZoneBeginS" );
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginNS );
|
||||
lua_setfield( L, -2, "ZoneBeginNS" );
|
||||
#else
|
||||
lua_pushcfunction( L, detail::LuaZoneBegin );
|
||||
lua_setfield( L, -2, "ZoneBeginS" );
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginN );
|
||||
lua_setfield( L, -2, "ZoneBeginNS" );
|
||||
#endif
|
||||
lua_pushcfunction( L, detail::LuaZoneEnd );
|
||||
lua_setfield( L, -2, "ZoneEnd" );
|
||||
lua_pushcfunction( L, detail::LuaZoneText );
|
||||
lua_setfield( L, -2, "ZoneText" );
|
||||
lua_pushcfunction( L, detail::LuaZoneName );
|
||||
lua_setfield( L, -2, "ZoneName" );
|
||||
lua_pushcfunction( L, detail::LuaMessage );
|
||||
lua_setfield( L, -2, "Message" );
|
||||
lua_setglobal( L, "tracy" );
|
||||
}
|
||||
|
||||
static inline void LuaRemove( char* script ) {}
|
||||
|
||||
static inline void LuaHook( lua_State* L, lua_Debug* ar )
|
||||
{
|
||||
if ( ar->event == LUA_HOOKCALL )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if ( zoneCnt != 0 && !GetLuaZoneState().active ) return;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if ( !GetLuaZoneState().active ) return;
|
||||
#endif
|
||||
lua_getinfo( L, "Snl", ar );
|
||||
|
||||
char src[256];
|
||||
detail::LuaShortenSrc( src, ar->short_src );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( ar->currentline, src, ar->name ? ar->name : ar->short_src );
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
else if (ar->event == LUA_HOOKRET) {
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
assert( GetLuaZoneState().counter != 0 );
|
||||
GetLuaZoneState().counter--;
|
||||
if ( !GetLuaZoneState().active ) return;
|
||||
if ( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
TracyQueueCommit( zoneEndThread );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
644
tracy/TracyMetal.hmm
Normal file
644
tracy/TracyMetal.hmm
Normal file
@@ -0,0 +1,644 @@
|
||||
#ifndef __TRACYMETAL_HMM__
|
||||
#define __TRACYMETAL_HMM__
|
||||
|
||||
/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple
|
||||
Silicon devices, but it should also work on Intel-based Macs and older iOS devices).
|
||||
The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan,
|
||||
Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where
|
||||
a command encoder is created. This is because not all hardware supports timestamps at
|
||||
command granularity, and can only provide timestamps around an entire command encoder.
|
||||
This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone()
|
||||
will be added to support the habitual command-level granularity of Tracy GPU back-ends.
|
||||
Metal also imposes a few restrictions that make the process of requesting and collecting
|
||||
queries more complicated in Tracy:
|
||||
a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes)
|
||||
b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes,
|
||||
and there's no way to reset them back to zero after timestamps get resolved; the only
|
||||
way to clear the timestamps is by allocating a new timestamp query buffer
|
||||
c) if a command encoder records no commands and its corresponding command buffer ends up
|
||||
committed to the command queue, Metal will "optimize-away" the encoder along with any
|
||||
timestamp queries associated with it (the timestamp will remain as zero and will never
|
||||
get resolved)
|
||||
Because of the limitations above, two timestamp buffers are managed internally. Once one
|
||||
of the buffers fills up with requests, the second buffer can start serving new requests.
|
||||
Once all requests in a buffer get resolved and collected, the entire buffer is discarded
|
||||
and a new one allocated for future requests. (Proper cycling through a ring buffer would
|
||||
require bookkeeping and completion handlers to collect only the known complete queries.)
|
||||
In the current implementation, there is potential for a race condition when the buffer is
|
||||
discarded and reallocated. In practice, the race condition will never materialize so long
|
||||
as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low.
|
||||
Finally, there's a timeout mechanism during timestamp collection to detect "empty" command
|
||||
encoders and ensure progress.
|
||||
*/
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyMetalContext(device) nullptr
|
||||
#define TracyMetalDestroy(ctx)
|
||||
#define TracyMetalContextName(ctx, name, size)
|
||||
|
||||
#define TracyMetalZone(ctx, encoderDesc, name)
|
||||
#define TracyMetalZoneC(ctx, encoderDesc, name, color)
|
||||
#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active)
|
||||
#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active)
|
||||
|
||||
#define TracyMetalCollect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class MetalZoneScope {};
|
||||
}
|
||||
|
||||
using TracyMetalCtx = void;
|
||||
|
||||
#else
|
||||
|
||||
#if not __has_feature(objc_arc)
|
||||
#error TracyMetal requires ARC to be enabled.
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
// ok to import if in obj-c code
|
||||
#import <Metal/Metal.h>
|
||||
|
||||
#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__
|
||||
|
||||
#define TracyMetalPanic(ret, msg, ...) do { \
|
||||
char buffer [1024]; \
|
||||
snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \
|
||||
TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \
|
||||
fprintf(stderr, "%s\n", buffer); \
|
||||
ret; \
|
||||
} while(false);
|
||||
|
||||
#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||
#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f
|
||||
#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||
|
||||
#ifndef TRACY_METAL_DEBUG_MASK
|
||||
#define TRACY_METAL_DEBUG_MASK (0)
|
||||
#endif//TRACY_METAL_DEBUG_MASK
|
||||
|
||||
#if TRACY_METAL_DEBUG_MASK
|
||||
#define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; }
|
||||
#else
|
||||
#define TracyMetalDebugMasked(mask, ...)
|
||||
#endif
|
||||
|
||||
#if TRACY_METAL_DEBUG_MASK & (1 << 1)
|
||||
#define TracyMetalDebug_0b00010(...) __VA_ARGS__;
|
||||
#else
|
||||
#define TracyMetalDebug_0b00010(...)
|
||||
#endif
|
||||
|
||||
#if TRACY_METAL_DEBUG_MASK & (1 << 4)
|
||||
#define TracyMetalDebug_0b10000(...) __VA_ARGS__;
|
||||
#else
|
||||
#define TracyMetalDebug_0b10000(...)
|
||||
#endif
|
||||
|
||||
#ifndef TracyMetalDebugZoneScopeWireTap
|
||||
#define TracyMetalDebugZoneScopeWireTap
|
||||
#endif//TracyMetalDebugZoneScopeWireTap
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class MetalCtx
|
||||
{
|
||||
friend class MetalZoneScope;
|
||||
|
||||
enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_...
|
||||
|
||||
public:
|
||||
static MetalCtx* Create(id<MTLDevice> device)
|
||||
{
|
||||
ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4);
|
||||
auto ctx = static_cast<MetalCtx*>(tracy_malloc(sizeof(MetalCtx)));
|
||||
new (ctx) MetalCtx(device);
|
||||
if (ctx->m_contextId == 255)
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context.");
|
||||
Destroy(ctx);
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static void Destroy(MetalCtx* ctx)
|
||||
{
|
||||
ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4);
|
||||
ctx->~MetalCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_contextId );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
bool Collect()
|
||||
{
|
||||
ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Only one thread is allowed to collect timestamps at any given time
|
||||
// but there's no need to block contending threads
|
||||
if (!m_collectionMutex.try_lock())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_lock lock (m_collectionMutex, std::adopt_lock);
|
||||
|
||||
uintptr_t begin = m_previousCheckpoint.load();
|
||||
uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?;
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(begin));
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint));
|
||||
|
||||
uint32_t count = RingCount(begin, latestCheckpoint);
|
||||
if (count == 0) // no pending timestamp queries
|
||||
{
|
||||
//uintptr_t nextCheckpoint = m_queryCounter.load();
|
||||
//if (nextCheckpoint != latestCheckpoint)
|
||||
//{
|
||||
// // TODO: signal event / fence now?
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
|
||||
// resolve up until the ring buffer boundary and let a subsequenty call
|
||||
// to Collect handle the wrap-around
|
||||
bool reallocateBuffer = false;
|
||||
if (RingIndex(begin) + count >= RingSize())
|
||||
{
|
||||
count = RingSize() - RingIndex(begin);
|
||||
reallocateBuffer = true;
|
||||
}
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(count));
|
||||
|
||||
auto buffer_idx = (begin / MaxQueries) % 2;
|
||||
auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx];
|
||||
|
||||
if (count >= RingSize())
|
||||
{
|
||||
TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count);
|
||||
}
|
||||
|
||||
TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count));
|
||||
|
||||
NSRange range = NSMakeRange(RingIndex(begin), count);
|
||||
NSData* data = [counterSampleBuffer resolveCounterRange:range];
|
||||
NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp);
|
||||
MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes);
|
||||
if (timestamps == nil)
|
||||
{
|
||||
TracyMetalPanic(return false, "Collect: unable to resolve timestamps.");
|
||||
}
|
||||
|
||||
if (numResolvedTimestamps != count)
|
||||
{
|
||||
TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count);
|
||||
}
|
||||
|
||||
int resolved = 0;
|
||||
for (auto i = 0; i < numResolvedTimestamps; i += 2)
|
||||
{
|
||||
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") );
|
||||
MTLTimestamp t_start = timestamps[i+0].timestamp;
|
||||
MTLTimestamp t_end = timestamps[i+1].timestamp;
|
||||
uint32_t k = RingIndex(begin + i);
|
||||
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start)));
|
||||
if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue))
|
||||
{
|
||||
TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k);
|
||||
break;
|
||||
}
|
||||
// Metal will initialize timestamp buffer with zeroes; encountering a zero-value
|
||||
// timestamp means that the timestamp has not been written and resolved yet
|
||||
if ((t_start == 0) || (t_end == 0))
|
||||
{
|
||||
auto checkTime = std::chrono::high_resolution_clock::now();
|
||||
auto requestTime = m_timestampRequestTime[k];
|
||||
auto ms_in_flight = std::chrono::duration<float>(checkTime-requestTime).count()*1000.0f;
|
||||
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight));
|
||||
const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f;
|
||||
if (ms_in_flight < timeout_ms)
|
||||
break;
|
||||
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") );
|
||||
TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight);
|
||||
t_start = m_mostRecentTimestamp + 5;
|
||||
t_end = t_start + 5;
|
||||
}
|
||||
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone"));
|
||||
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone"));
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_start));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_end));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k+1));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp;
|
||||
TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId"));
|
||||
resolved += 2;
|
||||
}
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load())));
|
||||
|
||||
m_previousCheckpoint += resolved;
|
||||
|
||||
// Check whether the timestamp buffer has been fully resolved/collected:
|
||||
// WARN: there's technically a race condition here: NextQuery() may reference the
|
||||
// buffer that is being released instead of the new one. In practice, this should
|
||||
// never happen so long as Collect is called frequently enough to prevent pending
|
||||
// timestamp query requests from piling up too quickly.
|
||||
if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0)
|
||||
{
|
||||
m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||
}
|
||||
|
||||
//RecalibrateClocks(); // to account for drift
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
MetalCtx(id<MTLDevice> device)
|
||||
: m_device(device)
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue));
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample));
|
||||
|
||||
if (m_device == nil)
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return, "device is nil.");
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary])
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported.");
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n"));
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n"));
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n"));
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n"));
|
||||
}
|
||||
|
||||
m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||
m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||
|
||||
m_timestampRequestTime.resize(MaxQueries);
|
||||
|
||||
MTLTimestamp cpuTimestamp = 0;
|
||||
MTLTimestamp gpuTimestamp = 0;
|
||||
[m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp];
|
||||
m_mostRecentTimestamp = gpuTimestamp;
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp));
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp));
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp));
|
||||
|
||||
float period = 1.0f;
|
||||
|
||||
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp));
|
||||
MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp));
|
||||
MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()?
|
||||
MemWrite(&item->gpuNewContext.period, period);
|
||||
MemWrite(&item->gpuNewContext.context, m_contextId);
|
||||
//MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0));
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Metal);
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
~MetalCtx()
|
||||
{
|
||||
// collect the last remnants of Metal GPU activity...
|
||||
// TODO: add a timeout to this loop?
|
||||
while (m_previousCheckpoint.load() != m_queryCounter.load())
|
||||
Collect();
|
||||
}
|
||||
|
||||
tracy_force_inline void SubmitQueueItem(QueueItem* item)
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingIndex(uintptr_t index)
|
||||
{
|
||||
index %= MaxQueries;
|
||||
return static_cast<uint32_t>(index);
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
|
||||
{
|
||||
// wrap-around safe: all unsigned
|
||||
uintptr_t count = end - begin;
|
||||
return static_cast<uint32_t>(count);
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingSize() const
|
||||
{
|
||||
return MaxQueries;
|
||||
}
|
||||
|
||||
struct Query { id<MTLCounterSampleBuffer> buffer; uint32_t idx; };
|
||||
|
||||
tracy_force_inline Query NextQuery()
|
||||
{
|
||||
TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) );
|
||||
auto id = m_queryCounter.fetch_add(2);
|
||||
TracyMetalDebug_0b00010( ZoneValue(id) );
|
||||
auto count = RingCount(m_previousCheckpoint, id);
|
||||
if (count >= MaxQueries)
|
||||
{
|
||||
// TODO: return a proper (hidden) "sentinel" query
|
||||
Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 };
|
||||
TracyMetalPanic(
|
||||
return sentinel,
|
||||
"NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)",
|
||||
m_previousCheckpoint.load(), id, count
|
||||
);
|
||||
}
|
||||
uint32_t buffer_idx = (id / MaxQueries) % 2;
|
||||
TracyMetalDebug_0b00010( ZoneValue(buffer_idx) );
|
||||
auto buffer = m_counterSampleBuffers[buffer_idx];
|
||||
if (buffer == nil)
|
||||
TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id);
|
||||
uint32_t idx = RingIndex(id);
|
||||
TracyMetalDebug_0b00010( ZoneValue(idx) );
|
||||
TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") );
|
||||
m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now();
|
||||
return Query{ buffer, idx };
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetContextId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
static id<MTLCounterSampleBuffer> NewTimestampSampleBuffer(id<MTLDevice> device, size_t count)
|
||||
{
|
||||
ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer");
|
||||
|
||||
id<MTLCounterSet> timestampCounterSet = nil;
|
||||
for (id<MTLCounterSet> counterSet in device.counterSets)
|
||||
{
|
||||
if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp])
|
||||
{
|
||||
timestampCounterSet = counterSet;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (timestampCounterSet == nil)
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform.");
|
||||
}
|
||||
|
||||
MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init];
|
||||
sampleDescriptor.counterSet = timestampCounterSet;
|
||||
sampleDescriptor.sampleCount = MaxQueries;
|
||||
sampleDescriptor.storageMode = MTLStorageModeShared;
|
||||
sampleDescriptor.label = @"TracyMetalTimestampPool";
|
||||
|
||||
NSError* error = nil;
|
||||
id<MTLCounterSampleBuffer> counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error];
|
||||
if (error != nil)
|
||||
{
|
||||
//NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason);
|
||||
TracyMetalPanic({assert(false);} return nil,
|
||||
"ERROR: unable to create sample buffer for timestamp counters : %s | %s",
|
||||
[error.localizedDescription cString], [error.localizedFailureReason cString]);
|
||||
}
|
||||
|
||||
return counterSampleBuffer;
|
||||
}
|
||||
|
||||
uint8_t m_contextId = 255;
|
||||
|
||||
id<MTLDevice> m_device = nil;
|
||||
id<MTLCounterSampleBuffer> m_counterSampleBuffers [2] = {};
|
||||
|
||||
using atomic_counter = std::atomic<uintptr_t>;
|
||||
static_assert(atomic_counter::is_always_lock_free);
|
||||
atomic_counter m_queryCounter = 0;
|
||||
|
||||
atomic_counter m_previousCheckpoint = 0;
|
||||
MTLTimestamp m_mostRecentTimestamp = 0;
|
||||
|
||||
std::vector<std::chrono::high_resolution_clock::time_point> m_timestampRequestTime;
|
||||
|
||||
std::mutex m_collectionMutex;
|
||||
};
|
||||
|
||||
class MetalZoneScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if ( !m_active ) return;
|
||||
if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil.");
|
||||
m_ctx = ctx;
|
||||
|
||||
auto& query = m_query = ctx->NextQuery();
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
|
||||
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
|
||||
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
||||
}
|
||||
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if ( !m_active ) return;
|
||||
if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil.");
|
||||
m_ctx = ctx;
|
||||
|
||||
auto& query = m_query = ctx->NextQuery();
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
|
||||
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
|
||||
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
||||
}
|
||||
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if ( !m_active ) return;
|
||||
if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil.");
|
||||
m_ctx = ctx;
|
||||
|
||||
auto& query = m_query = ctx->NextQuery();
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
|
||||
desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0;
|
||||
desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample;
|
||||
desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample;
|
||||
desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1;
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
||||
}
|
||||
|
||||
/* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, id<MTLComputeCommandEncoder> cmdEncoder, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
m_cmdEncoder = cmdEncoder;
|
||||
|
||||
auto& query = m_query = ctx->NextQueryId();
|
||||
|
||||
[m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES];
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx, srcloc);
|
||||
}
|
||||
*/
|
||||
|
||||
tracy_force_inline ~MetalZoneScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
SubmitZoneEndGpu(m_ctx, m_query.idx + 1);
|
||||
}
|
||||
|
||||
TracyMetalDebugZoneScopeWireTap;
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
MetalCtx* m_ctx;
|
||||
|
||||
/* TODO: declare it for "command-level" profiling
|
||||
id<MTLComputeCommandEncoder> m_cmdEncoder;
|
||||
*/
|
||||
|
||||
static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc)
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
|
||||
}
|
||||
|
||||
static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId)
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
|
||||
}
|
||||
|
||||
MetalCtx::Query m_query = {};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
using TracyMetalCtx = tracy::MetalCtx;
|
||||
|
||||
#define TracyMetalContext(device) tracy::MetalCtx::Create(device)
|
||||
#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx)
|
||||
#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size)
|
||||
|
||||
#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true )
|
||||
#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true )
|
||||
#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
|
||||
#define TracyMetalCollect( ctx ) ctx->Collect();
|
||||
|
||||
|
||||
|
||||
#undef TracyMetalDebug_ZoneScopeWireTap
|
||||
#undef TracyMetalDebug_0b00010
|
||||
#undef TracyMetalDebug_0b10000
|
||||
#undef TracyMetalDebugMasked
|
||||
#undef TRACY_METAL_DEBUG_MASK
|
||||
#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||
#undef TracyMetalPanic
|
||||
#undef TRACY_METAL_VA_ARGS
|
||||
|
||||
#endif
|
||||
|
||||
#endif//__TRACYMETAL_HMM__
|
||||
414
tracy/TracyOpenCL.hpp
Normal file
414
tracy/TracyOpenCL.hpp
Normal file
@@ -0,0 +1,414 @@
|
||||
#ifndef __TRACYOPENCL_HPP__
|
||||
#define __TRACYOPENCL_HPP__
|
||||
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyCLContext(c, x) nullptr
|
||||
#define TracyCLDestroy(c)
|
||||
#define TracyCLContextName(c, x, y)
|
||||
|
||||
#define TracyCLNamedZone(c, x, y, z)
|
||||
#define TracyCLNamedZoneC(c, x, y, z, w)
|
||||
#define TracyCLZone(c, x)
|
||||
#define TracyCLZoneC(c, x, y)
|
||||
#define TracyCLZoneTransient(c,x,y,z)
|
||||
|
||||
#define TracyCLNamedZoneS(c, x, y, z, w)
|
||||
#define TracyCLNamedZoneCS(c, x, y, z, w, v)
|
||||
#define TracyCLZoneS(c, x, y)
|
||||
#define TracyCLZoneCS(c, x, y, z)
|
||||
#define TracyCLZoneTransientS(c,x,y,z,w)
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(x, e)
|
||||
#define TracyCLZoneSetEvent(e)
|
||||
|
||||
#define TracyCLCollect(c)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class OpenCLCtxScope {};
|
||||
}
|
||||
|
||||
using TracyCLCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
#define TRACY_CL_TO_STRING_INDIRECT(T) #T
|
||||
#define TRACY_CL_TO_STRING(T) TRACY_CL_TO_STRING_INDIRECT(T)
|
||||
#define TRACY_CL_ASSERT(p) if(!(p)) { \
|
||||
TracyMessageL( "TRACY_CL_ASSERT failed on " TracyFile ":" TRACY_CL_TO_STRING(TracyLine) ); \
|
||||
assert(false && "TRACY_CL_ASSERT failed"); \
|
||||
}
|
||||
#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "TRACY_CL_CHECK_ERROR failed on " << TracyFile << ":" << TracyLine \
|
||||
<< ": error code " << err; \
|
||||
auto msg = oss.str(); \
|
||||
TracyMessage(msg.data(), msg.size()); \
|
||||
assert(false && "TRACY_CL_CHECK_ERROR failed"); \
|
||||
}
|
||||
|
||||
namespace tracy {
|
||||
|
||||
enum class EventPhase : uint8_t
|
||||
{
|
||||
Begin,
|
||||
End
|
||||
};
|
||||
|
||||
struct EventInfo
|
||||
{
|
||||
cl_event event;
|
||||
EventPhase phase;
|
||||
};
|
||||
|
||||
class OpenCLCtx
|
||||
{
|
||||
public:
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
OpenCLCtx(cl_context context, cl_device_id device)
|
||||
: m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
, m_head(0)
|
||||
, m_tail(0)
|
||||
{
|
||||
int64_t tcpu, tgpu;
|
||||
TRACY_CL_ASSERT(m_contextId != 255);
|
||||
|
||||
cl_int err = CL_SUCCESS;
|
||||
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
TRACY_CL_CHECK_ERROR(err)
|
||||
uint32_t dummyValue = 42;
|
||||
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
|
||||
TRACY_CL_CHECK_ERROR(err)
|
||||
cl_event writeBufferEvent;
|
||||
TRACY_CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent));
|
||||
TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &writeBufferEvent));
|
||||
|
||||
tcpu = Profiler::GetTime();
|
||||
|
||||
cl_int eventStatus;
|
||||
TRACY_CL_CHECK_ERROR(clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr));
|
||||
TRACY_CL_ASSERT(eventStatus == CL_COMPLETE);
|
||||
TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr));
|
||||
TRACY_CL_CHECK_ERROR(clReleaseEvent(writeBufferEvent));
|
||||
TRACY_CL_CHECK_ERROR(clReleaseMemObject(dummyBuffer));
|
||||
TRACY_CL_CHECK_ERROR(clReleaseCommandQueue(queue));
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, tcpu);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, tgpu);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1.0f);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
|
||||
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
|
||||
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, (uint8_t)m_contextId );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
if (m_tail == m_head) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; m_tail != m_head; m_tail = (m_tail + 1) % QueryCount)
|
||||
{
|
||||
EventInfo eventInfo = GetQuery(m_tail);
|
||||
cl_int eventStatus;
|
||||
cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping...";
|
||||
auto msg = oss.str();
|
||||
TracyMessage(msg.data(), msg.size());
|
||||
if (eventInfo.event == nullptr) {
|
||||
TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check your code!");
|
||||
}
|
||||
assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent");
|
||||
continue;
|
||||
}
|
||||
if (eventStatus != CL_COMPLETE) return;
|
||||
|
||||
cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin)
|
||||
? CL_PROFILING_COMMAND_START
|
||||
: CL_PROFILING_COMMAND_END;
|
||||
|
||||
cl_ulong eventTimeStamp = 0;
|
||||
err = clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr);
|
||||
if (err == CL_PROFILING_INFO_NOT_AVAILABLE)
|
||||
{
|
||||
TracyMessageL("command queue is not created with CL_QUEUE_PROFILING_ENABLE flag, check your code!");
|
||||
assert(false && "command queue is not created with CL_QUEUE_PROFILING_ENABLE flag");
|
||||
}
|
||||
else
|
||||
TRACY_CL_CHECK_ERROR(err);
|
||||
|
||||
TRACY_CL_ASSERT(eventTimeStamp != 0);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp);
|
||||
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
if (eventInfo.phase == EventPhase::End)
|
||||
{
|
||||
// Done with the event, so release it
|
||||
TRACY_CL_CHECK_ERROR(clReleaseEvent(eventInfo.event));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo)
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = (m_head + 1) % QueryCount;
|
||||
TRACY_CL_ASSERT(m_head != m_tail);
|
||||
m_query[id] = eventInfo;
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline EventInfo& GetQuery(unsigned int id)
|
||||
{
|
||||
TRACY_CL_ASSERT(id < QueryCount);
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
unsigned int m_contextId;
|
||||
|
||||
EventInfo m_query[QueryCount];
|
||||
unsigned int m_head; // index at which a new event should be inserted
|
||||
unsigned int m_tail; // oldest event
|
||||
|
||||
};
|
||||
|
||||
class OpenCLCtxScope {
|
||||
public:
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
GetProfiler().SendCallstack(depth);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void SetEvent(cl_event event)
|
||||
{
|
||||
if (!m_active) return;
|
||||
m_event = event;
|
||||
TRACY_CL_CHECK_ERROR(clRetainEvent(m_event));
|
||||
m_ctx->GetQuery(m_beginQueryId).event = m_event;
|
||||
}
|
||||
|
||||
tracy_force_inline ~OpenCLCtxScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId);
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
const bool m_active;
|
||||
OpenCLCtx* m_ctx;
|
||||
cl_event m_event;
|
||||
unsigned int m_beginQueryId;
|
||||
};
|
||||
|
||||
static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device)
|
||||
{
|
||||
auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx));
|
||||
new (ctx) OpenCLCtx(context, device);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyCLContext(OpenCLCtx* ctx)
|
||||
{
|
||||
ctx->~OpenCLCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
} // namespace tracy
|
||||
|
||||
using TracyCLCtx = tracy::OpenCLCtx*;
|
||||
|
||||
#define TracyCLContext(ctx, device) tracy::CreateCLContext(ctx, device);
|
||||
#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx);
|
||||
#define TracyCLContextName(ctx, name, size) ctx->Name(name, size);
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active);
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active);
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true )
|
||||
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active);
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active);
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true)
|
||||
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active );
|
||||
#else
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active)
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active)
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color)
|
||||
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) TracyCLZoneTransient( ctx, varname, name, active )
|
||||
#endif
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event)
|
||||
#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event)
|
||||
|
||||
#define TracyCLCollect(ctx) ctx->Collect()
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
325
tracy/TracyOpenGL.hpp
Normal file
325
tracy/TracyOpenGL.hpp
Normal file
@@ -0,0 +1,325 @@
|
||||
#ifndef __TRACYOPENGL_HPP__
|
||||
#define __TRACYOPENGL_HPP__
|
||||
|
||||
#if !defined TRACY_ENABLE || defined __APPLE__
|
||||
|
||||
#define TracyGpuContext
|
||||
#define TracyGpuContextName(x,y)
|
||||
#define TracyGpuNamedZone(x,y,z)
|
||||
#define TracyGpuNamedZoneC(x,y,z,w)
|
||||
#define TracyGpuZone(x)
|
||||
#define TracyGpuZoneC(x,y)
|
||||
#define TracyGpuZoneTransient(x,y,z)
|
||||
#define TracyGpuCollect
|
||||
|
||||
#define TracyGpuNamedZoneS(x,y,z,w)
|
||||
#define TracyGpuNamedZoneCS(x,y,z,w,a)
|
||||
#define TracyGpuZoneS(x,y)
|
||||
#define TracyGpuZoneCS(x,y,z)
|
||||
#define TracyGpuZoneTransientS(x,y,z,w)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
struct SourceLocationData;
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
GpuCtxScope( const SourceLocationData*, bool ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int32_t, bool ) {}
|
||||
};
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
#if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT
|
||||
# define GL_TIMESTAMP GL_TIMESTAMP_EXT
|
||||
# define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT
|
||||
# define glGetQueryObjectiv glGetQueryObjectivEXT
|
||||
# define glGetQueryObjectui64v glGetQueryObjectui64vEXT
|
||||
# define glQueryCounter glQueryCounterEXT
|
||||
#endif
|
||||
|
||||
#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
||||
#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
|
||||
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active );
|
||||
#endif
|
||||
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active );
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active );
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
|
||||
# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active );
|
||||
#else
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
|
||||
# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class GpuCtx
|
||||
{
|
||||
friend class GpuCtxScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
GpuCtx()
|
||||
: m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
glGenQueries( QueryCount, m_query );
|
||||
|
||||
int64_t tgpu;
|
||||
glGetInteger64v( GL_TIMESTAMP, &tgpu );
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
GLint bits;
|
||||
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
||||
|
||||
const float period = 1.f;
|
||||
const auto thread = GetThreadHandle();
|
||||
TracyLfqPrepare( QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuNewContext.thread, thread );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
while( m_tail != m_head )
|
||||
{
|
||||
GLint available;
|
||||
glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available );
|
||||
if( !available ) return;
|
||||
|
||||
uint64_t time;
|
||||
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuTime );
|
||||
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
|
||||
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
|
||||
MemWrite( &item->gpuTime.context, m_context );
|
||||
TracyLfqCommit;
|
||||
|
||||
m_tail = ( m_tail + 1 ) % QueryCount;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % QueryCount;
|
||||
assert( m_head != m_tail );
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id )
|
||||
{
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
unsigned int m_query[QueryCount];
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
};
|
||||
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
#else
|
||||
GetProfiler().SendCallstack( depth );
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
#endif
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
#else
|
||||
GetProfiler().SendCallstack( depth );
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
#endif
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ~GpuCtxScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneEnd );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
747
tracy/TracyVulkan.hpp
Normal file
747
tracy/TracyVulkan.hpp
Normal file
@@ -0,0 +1,747 @@
|
||||
#ifndef __TRACYVULKAN_HPP__
|
||||
#define __TRACYVULKAN_HPP__
|
||||
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyVkContext(x,y,z,w) nullptr
|
||||
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
|
||||
#if defined VK_EXT_host_query_reset
|
||||
#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr
|
||||
#endif
|
||||
#define TracyVkDestroy(x)
|
||||
#define TracyVkContextName(c,x,y)
|
||||
#define TracyVkNamedZone(c,x,y,z,w)
|
||||
#define TracyVkNamedZoneC(c,x,y,z,w,a)
|
||||
#define TracyVkZone(c,x,y)
|
||||
#define TracyVkZoneC(c,x,y,z)
|
||||
#define TracyVkZoneTransient(c,x,y,z,w)
|
||||
#define TracyVkCollect(c,x)
|
||||
#define TracyVkCollectHost(c)
|
||||
|
||||
#define TracyVkNamedZoneS(c,x,y,z,w,a)
|
||||
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
|
||||
#define TracyVkZoneS(c,x,y,z)
|
||||
#define TracyVkZoneCS(c,x,y,z,w)
|
||||
#define TracyVkZoneTransientS(c,x,y,z,w,a)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class VkCtxScope {};
|
||||
}
|
||||
|
||||
using TracyVkCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#if !defined VK_NULL_HANDLE
|
||||
# error "You must include Vulkan headers before including TracyVulkan.hpp"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define LoadVkDeviceCoreSymbols(Operation) \
|
||||
Operation(vkBeginCommandBuffer) \
|
||||
Operation(vkCmdResetQueryPool) \
|
||||
Operation(vkCmdWriteTimestamp) \
|
||||
Operation(vkCreateQueryPool) \
|
||||
Operation(vkDestroyQueryPool) \
|
||||
Operation(vkEndCommandBuffer) \
|
||||
Operation(vkGetQueryPoolResults) \
|
||||
Operation(vkQueueSubmit) \
|
||||
Operation(vkQueueWaitIdle) \
|
||||
Operation(vkResetQueryPool)
|
||||
|
||||
#define LoadVkDeviceExtensionSymbols(Operation) \
|
||||
Operation(vkGetCalibratedTimestampsEXT)
|
||||
|
||||
#define LoadVkInstanceExtensionSymbols(Operation) \
|
||||
Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)
|
||||
|
||||
#define LoadVkInstanceCoreSymbols(Operation) \
|
||||
Operation(vkGetPhysicalDeviceProperties)
|
||||
|
||||
struct VkSymbolTable
|
||||
{
|
||||
#define MAKE_PFN(name) PFN_##name name;
|
||||
LoadVkDeviceCoreSymbols(MAKE_PFN)
|
||||
LoadVkDeviceExtensionSymbols(MAKE_PFN)
|
||||
LoadVkInstanceExtensionSymbols(MAKE_PFN)
|
||||
LoadVkInstanceCoreSymbols(MAKE_PFN)
|
||||
#undef MAKE_PFN
|
||||
};
|
||||
|
||||
#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature
|
||||
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature
|
||||
#else
|
||||
#define VK_FUNCTION_WRAPPER(callSignature) callSignature
|
||||
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature
|
||||
#endif
|
||||
|
||||
class VkCtx
|
||||
{
|
||||
friend class VkCtxScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated )
|
||||
#else
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT)
|
||||
#endif
|
||||
: m_device( device )
|
||||
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
, m_oldCnt( 0 )
|
||||
, m_queryCount( QueryCount )
|
||||
#if !defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
|
||||
#endif
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
|
||||
if ( calibrated )
|
||||
{
|
||||
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
|
||||
}
|
||||
|
||||
CreateQueryPool();
|
||||
|
||||
VkCommandBufferBeginInfo beginInfo = {};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
||||
VkSubmitInfo submitInfo = {};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &cmdbuf;
|
||||
|
||||
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
|
||||
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
|
||||
|
||||
int64_t tcpu, tgpu;
|
||||
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
|
||||
VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) );
|
||||
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
|
||||
|
||||
tcpu = Profiler::GetTime();
|
||||
VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) );
|
||||
|
||||
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) );
|
||||
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
FindCalibratedTimestampDeviation();
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
tcpu = Profiler::GetTime();
|
||||
}
|
||||
|
||||
WriteInitialItem( physdev, tcpu, tgpu );
|
||||
|
||||
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
|
||||
}
|
||||
|
||||
#if defined VK_EXT_host_query_reset
|
||||
/**
|
||||
* This alternative constructor does not use command buffers and instead uses functionality from
|
||||
* VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires
|
||||
* the physical device to have another time domain apart from DEVICE to be calibrateable.
|
||||
*/
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
|
||||
#else
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT )
|
||||
#endif
|
||||
: m_device( device )
|
||||
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
, m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
, m_oldCnt( 0 )
|
||||
, m_queryCount( QueryCount )
|
||||
#if !defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
|
||||
#endif
|
||||
{
|
||||
assert( m_context != 255);
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
|
||||
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
|
||||
#endif
|
||||
|
||||
assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr );
|
||||
assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr );
|
||||
assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr );
|
||||
|
||||
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
|
||||
|
||||
// We require a host time domain to be available to properly calibrate.
|
||||
FindCalibratedTimestampDeviation();
|
||||
int64_t tgpu;
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
CreateQueryPool();
|
||||
VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) );
|
||||
|
||||
WriteInitialItem( physdev, tcpu, tgpu );
|
||||
|
||||
// We need the buffer to be twice as large for availability values
|
||||
size_t resSize = sizeof( int64_t ) * m_queryCount * 2;
|
||||
m_res = (int64_t*)tracy_malloc( resSize );
|
||||
}
|
||||
#endif
|
||||
|
||||
~VkCtx()
|
||||
{
|
||||
tracy_free( m_res );
|
||||
VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) );
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect( VkCommandBuffer cmdbuf )
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
const uint64_t head = m_head.load(std::memory_order_relaxed);
|
||||
if( m_tail == head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
cmdbuf ?
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ) :
|
||||
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, 0, m_queryCount ) );
|
||||
m_tail = head;
|
||||
m_oldCnt = 0;
|
||||
int64_t tgpu;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
assert( head > m_tail );
|
||||
|
||||
const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount );
|
||||
|
||||
unsigned int cnt;
|
||||
if( m_oldCnt != 0 )
|
||||
{
|
||||
cnt = m_oldCnt;
|
||||
m_oldCnt = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cnt = (unsigned int)( head - m_tail );
|
||||
assert( cnt <= m_queryCount );
|
||||
if( wrappedTail + cnt > m_queryCount )
|
||||
{
|
||||
cnt = m_queryCount - wrappedTail;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) );
|
||||
|
||||
for( unsigned int idx=0; idx<cnt; idx++ )
|
||||
{
|
||||
int64_t avail = m_res[idx * 2 + 1];
|
||||
if( avail == 0 )
|
||||
{
|
||||
m_oldCnt = cnt - idx;
|
||||
cnt = idx;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuTime );
|
||||
MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] );
|
||||
MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) );
|
||||
MemWrite( &item->gpuTime.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
int64_t tgpu, tcpu;
|
||||
Calibrate( m_device, tcpu, tgpu );
|
||||
const auto refCpu = Profiler::GetTime();
|
||||
const auto delta = tcpu - m_prevCalibration;
|
||||
if( delta > 0 )
|
||||
{
|
||||
m_prevCalibration = tcpu;
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
|
||||
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||
MemWrite( &item->gpuCalibration.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
cmdbuf ?
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ) :
|
||||
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, wrappedTail, cnt ) );
|
||||
|
||||
m_tail += cnt;
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed);
|
||||
return id % m_queryCount;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
tracy_force_inline VkQueryPool GetQueryPool() const
|
||||
{
|
||||
return m_query;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
|
||||
{
|
||||
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation;
|
||||
do
|
||||
{
|
||||
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
|
||||
}
|
||||
while( deviation > m_deviation );
|
||||
|
||||
#if defined _WIN32
|
||||
tGpu = ts[0];
|
||||
tCpu = ts[1] * m_qpcToNs;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
tGpu = ts[0];
|
||||
tCpu = ts[1];
|
||||
#else
|
||||
assert( false );
|
||||
#endif
|
||||
}
|
||||
|
||||
tracy_force_inline void CreateQueryPool()
|
||||
{
|
||||
VkQueryPoolCreateInfo poolInfo = {};
|
||||
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) )
|
||||
{
|
||||
m_queryCount /= 2;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT )
|
||||
{
|
||||
uint32_t num;
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr );
|
||||
if(num > 4) num = 4;
|
||||
VkTimeDomainEXT data[4];
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data );
|
||||
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
|
||||
#if defined _WIN32
|
||||
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
|
||||
#endif
|
||||
for( uint32_t i=0; i<num; i++ ) {
|
||||
if(data[i] == supportedDomain) {
|
||||
m_timeDomain = data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void FindCalibratedTimestampDeviation()
|
||||
{
|
||||
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
|
||||
constexpr size_t NumProbes = 32;
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation[NumProbes];
|
||||
for( size_t i=0; i<NumProbes; i++ ) {
|
||||
m_vkGetCalibratedTimestampsEXT( m_device, 2, spec, ts, deviation + i );
|
||||
}
|
||||
uint64_t minDeviation = deviation[0];
|
||||
for( size_t i=1; i<NumProbes; i++ ) {
|
||||
if ( minDeviation > deviation[i] ) {
|
||||
minDeviation = deviation[i];
|
||||
}
|
||||
}
|
||||
m_deviation = minDeviation * 3 / 2;
|
||||
|
||||
#if defined _WIN32
|
||||
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||
#endif
|
||||
}
|
||||
|
||||
tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu )
|
||||
{
|
||||
uint8_t flags = 0;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||
|
||||
VkPhysicalDeviceProperties prop;
|
||||
VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) );
|
||||
const float period = prop.limits.timestampPeriod;
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
|
||||
{
|
||||
#define VK_GET_DEVICE_SYMBOL( name ) \
|
||||
(PFN_##name)deviceProcAddr( m_device, #name );
|
||||
#define VK_LOAD_DEVICE_SYMBOL( name ) \
|
||||
m_symbols.name = VK_GET_DEVICE_SYMBOL( name );
|
||||
#define VK_GET_INSTANCE_SYMBOL( name ) \
|
||||
(PFN_##name)instanceProcAddr( instance, #name );
|
||||
#define VK_LOAD_INSTANCE_SYMBOL( name ) \
|
||||
m_symbols.name = VK_GET_INSTANCE_SYMBOL( name );
|
||||
|
||||
LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL )
|
||||
LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL )
|
||||
LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL )
|
||||
LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL )
|
||||
#undef VK_GET_DEVICE_SYMBOL
|
||||
#undef VK_LOAD_DEVICE_SYMBOL
|
||||
#undef VK_GET_INSTANCE_SYMBOL
|
||||
#undef VK_LOAD_INSTANCE_SYMBOL
|
||||
}
|
||||
#endif
|
||||
|
||||
VkDevice m_device;
|
||||
VkQueryPool m_query;
|
||||
VkTimeDomainEXT m_timeDomain;
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
VkSymbolTable m_symbols;
|
||||
#endif
|
||||
uint64_t m_deviation;
|
||||
#ifdef _WIN32
|
||||
int64_t m_qpcToNs;
|
||||
#endif
|
||||
int64_t m_prevCalibration;
|
||||
uint8_t m_context;
|
||||
|
||||
std::atomic<uint64_t> m_head;
|
||||
uint64_t m_tail;
|
||||
unsigned int m_oldCnt;
|
||||
unsigned int m_queryCount;
|
||||
|
||||
int64_t* m_res;
|
||||
|
||||
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
|
||||
};
|
||||
|
||||
class VkCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
QueueItem *item;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
}
|
||||
else
|
||||
{
|
||||
item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
}
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
QueueItem *item;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
|
||||
}
|
||||
else
|
||||
{
|
||||
item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
}
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline ~VkCtxScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
VkCommandBuffer m_cmdbuf;
|
||||
VkCtx* m_ctx;
|
||||
};
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false )
|
||||
#else
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
#endif
|
||||
{
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated );
|
||||
#else
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
#endif
|
||||
return ctx;
|
||||
}
|
||||
|
||||
#if defined VK_EXT_host_query_reset
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr )
|
||||
#else
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
#endif
|
||||
{
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr );
|
||||
#else
|
||||
new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct );
|
||||
#endif
|
||||
return ctx;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void DestroyVkContext( VkCtx* ctx )
|
||||
{
|
||||
ctx->~VkCtx();
|
||||
tracy_free( ctx );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
using TracyVkCtx = tracy::VkCtx*;
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr );
|
||||
#else
|
||||
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
|
||||
#endif
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true );
|
||||
#else
|
||||
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
#endif
|
||||
#if defined VK_EXT_host_query_reset
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr );
|
||||
#else
|
||||
#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct );
|
||||
#endif
|
||||
#endif
|
||||
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
|
||||
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active )
|
||||
#else
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
|
||||
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active );
|
||||
#endif
|
||||
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
|
||||
#define TracyVkCollectHost( ctx ) ctx->Collect( VK_NULL_HANDLE );
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
|
||||
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, depth, active );
|
||||
#else
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active )
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active )
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color )
|
||||
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active )
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user