#ifndef __TRACYMETAL_HMM__ #define __TRACYMETAL_HMM__ /* This file implements a Metal API back-end for Tracy (it has only been tested on Apple Silicon devices, but it should also work on Intel-based Macs and older iOS devices). The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan, Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where a command encoder is created. This is because not all hardware supports timestamps at command granularity, and can only provide timestamps around an entire command encoder. This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone() will be added to support the habitual command-level granularity of Tracy GPU back-ends. Metal also imposes a few restrictions that make the process of requesting and collecting queries more complicated in Tracy: a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes) b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes, and there's no way to reset them back to zero after timestamps get resolved; the only way to clear the timestamps is by allocating a new timestamp query buffer c) if a command encoder records no commands and its corresponding command buffer ends up committed to the command queue, Metal will "optimize-away" the encoder along with any timestamp queries associated with it (the timestamp will remain as zero and will never get resolved) Because of the limitations above, two timestamp buffers are managed internally. Once one of the buffers fills up with requests, the second buffer can start serving new requests. Once all requests in a buffer get resolved and collected, the entire buffer is discarded and a new one allocated for future requests. (Proper cycling through a ring buffer would require bookkeeping and completion handlers to collect only the known complete queries.) In the current implementation, there is potential for a race condition when the buffer is discarded and reallocated. In practice, the race condition will never materialize so long as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low. Finally, there's a timeout mechanism during timestamp collection to detect "empty" command encoders and ensure progress. */ #ifndef TRACY_ENABLE #define TracyMetalContext(device) nullptr #define TracyMetalDestroy(ctx) #define TracyMetalContextName(ctx, name, size) #define TracyMetalZone(ctx, encoderDesc, name) #define TracyMetalZoneC(ctx, encoderDesc, name, color) #define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active) #define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active) #define TracyMetalCollect(ctx) namespace tracy { class MetalZoneScope {}; } using TracyMetalCtx = void; #else #if not __has_feature(objc_arc) #error TracyMetal requires ARC to be enabled. #endif #include #include #include #include "Tracy.hpp" #include "../client/TracyProfiler.hpp" #include "../client/TracyCallstack.hpp" #include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" // ok to import if in obj-c code #import #define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__ #define TracyMetalPanic(ret, msg, ...) do { \ char buffer [1024]; \ snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \ TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \ fprintf(stderr, "%s\n", buffer); \ ret; \ } while(false); #ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT #define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f #endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT #ifndef TRACY_METAL_DEBUG_MASK #define TRACY_METAL_DEBUG_MASK (0) #endif//TRACY_METAL_DEBUG_MASK #if TRACY_METAL_DEBUG_MASK #define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } #else #define TracyMetalDebugMasked(mask, ...) #endif #if TRACY_METAL_DEBUG_MASK & (1 << 1) #define TracyMetalDebug_0b00010(...) __VA_ARGS__; #else #define TracyMetalDebug_0b00010(...) #endif #if TRACY_METAL_DEBUG_MASK & (1 << 4) #define TracyMetalDebug_0b10000(...) __VA_ARGS__; #else #define TracyMetalDebug_0b10000(...) #endif #ifndef TracyMetalDebugZoneScopeWireTap #define TracyMetalDebugZoneScopeWireTap #endif//TracyMetalDebugZoneScopeWireTap namespace tracy { class MetalCtx { friend class MetalZoneScope; enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_... public: static MetalCtx* Create(id device) { ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4); auto ctx = static_cast(tracy_malloc(sizeof(MetalCtx))); new (ctx) MetalCtx(device); if (ctx->m_contextId == 255) { TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context."); Destroy(ctx); } return ctx; } static void Destroy(MetalCtx* ctx) { ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4); ctx->~MetalCtx(); tracy_free(ctx); } void Name( const char* name, uint16_t len ) { auto ptr = (char*)tracy_malloc( len ); memcpy( ptr, name, len ); auto* item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuContextName ); MemWrite( &item->gpuContextNameFat.context, m_contextId ); MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); MemWrite( &item->gpuContextNameFat.size, len ); SubmitQueueItem(item); } bool Collect() { ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4); #ifdef TRACY_ON_DEMAND if (!GetProfiler().IsConnected()) { return true; } #endif // Only one thread is allowed to collect timestamps at any given time // but there's no need to block contending threads if (!m_collectionMutex.try_lock()) { return true; } std::unique_lock lock (m_collectionMutex, std::adopt_lock); uintptr_t begin = m_previousCheckpoint.load(); uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?; TracyMetalDebugMasked(1<<3, ZoneValue(begin)); TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint)); uint32_t count = RingCount(begin, latestCheckpoint); if (count == 0) // no pending timestamp queries { //uintptr_t nextCheckpoint = m_queryCounter.load(); //if (nextCheckpoint != latestCheckpoint) //{ // // TODO: signal event / fence now? //} return true; } // resolve up until the ring buffer boundary and let a subsequenty call // to Collect handle the wrap-around bool reallocateBuffer = false; if (RingIndex(begin) + count >= RingSize()) { count = RingSize() - RingIndex(begin); reallocateBuffer = true; } TracyMetalDebugMasked(1<<3, ZoneValue(count)); auto buffer_idx = (begin / MaxQueries) % 2; auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx]; if (count >= RingSize()) { TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count); } TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); NSRange range = NSMakeRange(RingIndex(begin), count); NSData* data = [counterSampleBuffer resolveCounterRange:range]; NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp); MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes); if (timestamps == nil) { TracyMetalPanic(return false, "Collect: unable to resolve timestamps."); } if (numResolvedTimestamps != count) { TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count); } int resolved = 0; for (auto i = 0; i < numResolvedTimestamps; i += 2) { TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") ); MTLTimestamp t_start = timestamps[i+0].timestamp; MTLTimestamp t_end = timestamps[i+1].timestamp; uint32_t k = RingIndex(begin + i); TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue)) { TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k); break; } // Metal will initialize timestamp buffer with zeroes; encountering a zero-value // timestamp means that the timestamp has not been written and resolved yet if ((t_start == 0) || (t_end == 0)) { auto checkTime = std::chrono::high_resolution_clock::now(); auto requestTime = m_timestampRequestTime[k]; auto ms_in_flight = std::chrono::duration(checkTime-requestTime).count()*1000.0f; TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f; if (ms_in_flight < timeout_ms) break; TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") ); TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight); t_start = m_mostRecentTimestamp + 5; t_end = t_start + 5; } TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); { auto* item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuTime); MemWrite(&item->gpuTime.gpuTime, static_cast(t_start)); MemWrite(&item->gpuTime.queryId, static_cast(k)); MemWrite(&item->gpuTime.context, m_contextId); Profiler::QueueSerialFinish(); } { auto* item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuTime); MemWrite(&item->gpuTime.gpuTime, static_cast(t_end)); MemWrite(&item->gpuTime.queryId, static_cast(k+1)); MemWrite(&item->gpuTime.context, m_contextId); Profiler::QueueSerialFinish(); } m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp; TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); resolved += 2; } TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load()))); m_previousCheckpoint += resolved; // Check whether the timestamp buffer has been fully resolved/collected: // WARN: there's technically a race condition here: NextQuery() may reference the // buffer that is being released instead of the new one. In practice, this should // never happen so long as Collect is called frequently enough to prevent pending // timestamp query requests from piling up too quickly. if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0) { m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries); } //RecalibrateClocks(); // to account for drift return true; } private: MetalCtx(id device) : m_device(device) { TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); if (m_device == nil) { TracyMetalPanic({assert(false);} return, "device is nil."); } if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]) { TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported."); } if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]) { TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n")); } if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]) { TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n")); } if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) { TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n")); } if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary]) { TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n")); } m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries); m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries); m_timestampRequestTime.resize(MaxQueries); MTLTimestamp cpuTimestamp = 0; MTLTimestamp gpuTimestamp = 0; [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; m_mostRecentTimestamp = gpuTimestamp; TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); cpuTimestamp = Profiler::GetTime(); TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); float period = 1.0f; m_contextId = GetGpuCtxCounter().fetch_add(1); auto* item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuNewContext); MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp)); MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp)); MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()? MemWrite(&item->gpuNewContext.period, period); MemWrite(&item->gpuNewContext.context, m_contextId); //MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); MemWrite(&item->gpuNewContext.type, GpuContextType::Metal); SubmitQueueItem(item); } ~MetalCtx() { // collect the last remnants of Metal GPU activity... // TODO: add a timeout to this loop? while (m_previousCheckpoint.load() != m_queryCounter.load()) Collect(); } tracy_force_inline void SubmitQueueItem(QueueItem* item) { #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem(*item); #endif Profiler::QueueSerialFinish(); } tracy_force_inline uint32_t RingIndex(uintptr_t index) { index %= MaxQueries; return static_cast(index); } tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) { // wrap-around safe: all unsigned uintptr_t count = end - begin; return static_cast(count); } tracy_force_inline uint32_t RingSize() const { return MaxQueries; } struct Query { id buffer; uint32_t idx; }; tracy_force_inline Query NextQuery() { TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) ); auto id = m_queryCounter.fetch_add(2); TracyMetalDebug_0b00010( ZoneValue(id) ); auto count = RingCount(m_previousCheckpoint, id); if (count >= MaxQueries) { // TODO: return a proper (hidden) "sentinel" query Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 }; TracyMetalPanic( return sentinel, "NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)", m_previousCheckpoint.load(), id, count ); } uint32_t buffer_idx = (id / MaxQueries) % 2; TracyMetalDebug_0b00010( ZoneValue(buffer_idx) ); auto buffer = m_counterSampleBuffers[buffer_idx]; if (buffer == nil) TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id); uint32_t idx = RingIndex(id); TracyMetalDebug_0b00010( ZoneValue(idx) ); TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") ); m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); return Query{ buffer, idx }; } tracy_force_inline uint8_t GetContextId() const { return m_contextId; } static id NewTimestampSampleBuffer(id device, size_t count) { ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer"); id timestampCounterSet = nil; for (id counterSet in device.counterSets) { if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp]) { timestampCounterSet = counterSet; break; } } if (timestampCounterSet == nil) { TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform."); } MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init]; sampleDescriptor.counterSet = timestampCounterSet; sampleDescriptor.sampleCount = MaxQueries; sampleDescriptor.storageMode = MTLStorageModeShared; sampleDescriptor.label = @"TracyMetalTimestampPool"; NSError* error = nil; id counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error]; if (error != nil) { //NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason); TracyMetalPanic({assert(false);} return nil, "ERROR: unable to create sample buffer for timestamp counters : %s | %s", [error.localizedDescription cString], [error.localizedFailureReason cString]); } return counterSampleBuffer; } uint8_t m_contextId = 255; id m_device = nil; id m_counterSampleBuffers [2] = {}; using atomic_counter = std::atomic; static_assert(atomic_counter::is_always_lock_free); atomic_counter m_queryCounter = 0; atomic_counter m_previousCheckpoint = 0; MTLTimestamp m_mostRecentTimestamp = 0; std::vector m_timestampRequestTime; std::mutex m_collectionMutex; }; class MetalZoneScope { public: tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else : m_active( is_active ) #endif { if ( !m_active ) return; if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil."); m_ctx = ctx; auto& query = m_query = ctx->NextQuery(); desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); } tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else : m_active( is_active ) #endif { if ( !m_active ) return; if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil."); m_ctx = ctx; auto& query = m_query = ctx->NextQuery(); desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); } tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else : m_active( is_active ) #endif { if ( !m_active ) return; if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil."); m_ctx = ctx; auto& query = m_query = ctx->NextQuery(); desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0; desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample; desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample; desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1; SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); } /* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it tracy_force_inline MetalZoneScope( MetalCtx* ctx, id cmdEncoder, const SourceLocationData* srcloc, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else : m_active( is_active ) #endif { if( !m_active ) return; m_ctx = ctx; m_cmdEncoder = cmdEncoder; auto& query = m_query = ctx->NextQueryId(); [m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES]; SubmitZoneBeginGpu(ctx, query.idx, srcloc); } */ tracy_force_inline ~MetalZoneScope() { if( !m_active ) return; SubmitZoneEndGpu(m_ctx, m_query.idx + 1); } TracyMetalDebugZoneScopeWireTap; private: const bool m_active; MetalCtx* m_ctx; /* TODO: declare it for "command-level" profiling id m_cmdEncoder; */ static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc) { auto* item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() ); Profiler::QueueSerialFinish(); TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); } static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId) { auto* item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() ); Profiler::QueueSerialFinish(); TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); } MetalCtx::Query m_query = {}; }; } using TracyMetalCtx = tracy::MetalCtx; #define TracyMetalContext(device) tracy::MetalCtx::Create(device) #define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx) #define TracyMetalContextName(ctx, name, size) ctx->Name(name, size) #define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true ) #define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true ) #define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); #define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); #define TracyMetalCollect( ctx ) ctx->Collect(); #undef TracyMetalDebug_ZoneScopeWireTap #undef TracyMetalDebug_0b00010 #undef TracyMetalDebug_0b10000 #undef TracyMetalDebugMasked #undef TRACY_METAL_DEBUG_MASK #undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT #undef TracyMetalPanic #undef TRACY_METAL_VA_ARGS #endif #endif//__TRACYMETAL_HMM__