一言以蔽之
UE4 的 Profiler 基于 Instrumentation ,通过 STAT 相关 Macro 来埋点插桩、通过调用各平台的高分辨率时间戳来测量时间间隔等。
相关工具
Session Frontend
Stat
所用方案
Instrumentation
详见性能分析工具的实现原理探究
自定义埋点
步骤
定义
DECLARE_STATS_GROUP(TEXT("MarsTest"), STATGROUP_MarsTest, STATCAT_Advanced)
DECLARE_CYCLE_STAT(TEXT("MarsTest_Character_Tick"), STAT_MarsTest_Character_Tick, STATGROUP_MarsTest)
使用
SCOPE_CYCLE_COUNTER(STAT_MarsTest_Character_Tick)
效果图
Stat MarsTest
Frontend - Profiler
核心原理
埋点
DECLARE_STATS_GROUP(TEXT("MarsTest"), STATGROUP_MarsTest, STATCAT_Advanced)
# define DECLARE_STATS_GROUP(GroupDesc, GroupId, GroupCat) \
DECLARE_STAT_GROUP(GroupDesc, GroupId, GroupCat, true, true, false);
# define DECLARE_STAT_GROUP(Description, StatName, StatCategory, InDefaultEnable, InCompileTimeEnable, InSortByName) \
struct FStatGroup_##StatName\
{ \
enum \
{ \
DefaultEnable = InDefaultEnable, \
CompileTimeEnable = InCompileTimeEnable, \
SortByName = InSortByName \
}; \
static FORCEINLINE const char* GetGroupName() \
{ \
return #StatName; \
} \
static FORCEINLINE const char* GetGroupCategory() \
{ \
return #StatCategory; \
} \
static FORCEINLINE const TCHAR* GetDescription() \
{ \
return Description; \
} \
static FORCEINLINE bool IsDefaultEnabled() \
{ \
return (bool)DefaultEnable; \
} \
static FORCEINLINE bool IsCompileTimeEnable() \
{ \
return (bool)CompileTimeEnable; \
} \
static FORCEINLINE bool GetSortByName() \
{ \
return (bool)SortByName; \
} \
};
DECLARE_CYCLE_STAT(TEXT("MarsTest_Character_Tick"), STAT_MarsTest_Character_Tick, STATGROUP_MarsTest)
# define DECLARE_CYCLE_STAT(CounterName,StatId,GroupId) \
DECLARE_STAT(CounterName,StatId,GroupId,EStatDataType::ST_int64, true, true, FPlatformMemory::MCR_Invalid); \
static DEFINE_STAT(StatId)
# define DEFINE_STAT(Stat) \
struct FThreadSafeStaticStat<FStat_##Stat> StatPtr_##Stat;
template<class TStatData>
struct FThreadSafeStaticStat : public FThreadSafeStaticStatInner<TStatData, TStatData::TGroup::CompileTimeEnable>
{
FThreadSafeStaticStat()
{
//This call will result in registering the Group if it's compile time enabled.
//It fixes a bug when a StatGroup only has counters that are using the INC_\DEC_ macros.
//Those macros are guarded for the stats collection to be active which prevented the registration of the stat group.
//It was not possible to activate the stat group unless another was already active.
//Most groups are registered when a FScopeCycleCounter is declared as GetStatId is called as the constructor parameter.
FThreadSafeStaticStatInner<TStatData, TStatData::TGroup::CompileTimeEnable>::GetStatId();
}
};
template<class TStatData, bool TCompiledIn>
struct FThreadSafeStaticStatInner : public FThreadSafeStaticStatBase
{
FORCEINLINE_STATS TStatId GetStatId() const
{
const TStatIdData* LocalHighPerformanceEnable = HighPerformanceEnable.Load(EMemoryOrder::Relaxed);
if (!LocalHighPerformanceEnable)
{
LocalHighPerformanceEnable = DoSetup(TStatData::GetStatName(), TStatData::GetDescription(), TStatData::TGroup::GetGroupName(), TStatData::TGroup::GetGroupCategory(), TStatData::TGroup::GetDescription(), TStatData::TGroup::IsDefaultEnabled(), TStatData::IsClearEveryFrame(), TStatData::GetStatType(), TStatData::IsCycleStat(), TStatData::TGroup::GetSortByName(), TStatData::GetMemoryRegion() );
}
return TStatId(LocalHighPerformanceEnable);
}
FORCEINLINE FName GetStatFName() const
{
return GetStatId().GetName();
}
};
const TStatIdData* FThreadSafeStaticStatBase::DoSetup(const char* InStatName, const TCHAR* InStatDesc, const char* InGroupName, const char* InGroupCategory, const TCHAR* InGroupDesc, bool bDefaultEnable, bool bShouldClearEveryFrame, EStatDataType::Type InStatType, bool bCycleStat, bool bSortByName, FPlatformMemory::EMemoryCounterRegion InMemoryRegion) const
{
FName TempName(InStatName);
// send meta data, we don't use normal messages because the stats thread might not be running yet
FStartupMessages::Get().AddMetadata(TempName, InStatDesc, InGroupName, InGroupCategory, InGroupDesc, bShouldClearEveryFrame, InStatType, bCycleStat, bSortByName, InMemoryRegion);
TStatIdData const* LocalHighPerformanceEnable(IStatGroupEnableManager::Get().GetHighPerformanceEnableForStat(FName(InStatName), InGroupName, InGroupCategory, bDefaultEnable, bShouldClearEveryFrame, InStatType, InStatDesc, bCycleStat, bSortByName, InMemoryRegion).GetRawPointer());
TStatIdData const* OldHighPerformanceEnable = HighPerformanceEnable.Exchange(LocalHighPerformanceEnable);
check(!OldHighPerformanceEnable || LocalHighPerformanceEnable == OldHighPerformanceEnable); // we are assigned two different groups?
return LocalHighPerformanceEnable;
}
SCOPE_CYCLE_COUNTER(STAT_MarsTest_Character_Tick)
# define SCOPE_CYCLE_COUNTER(Stat) \
FScopeCycleCounter CycleCount_##Stat(GET_STATID(Stat));
class FScopeCycleCounter : public FCycleCounter
{
public:
/**
* Pushes the specified stat onto the hierarchy for this thread. Starts
* the timing of the cycles used
*/
FORCEINLINE_STATS FScopeCycleCounter( TStatId StatId, bool bAlways = false )
{
Start( StatId, bAlways );
}
/**
* Updates the stat with the time spent
*/
FORCEINLINE_STATS ~FScopeCycleCounter()
{
Stop();
}
};
FORCEINLINE_STATS void Stop()
{
if ( EmittedEvent & NamedEvent )
{
FPlatformMisc::EndNamedEvent();
}
# if CPUPROFILERTRACE_ENABLED
if (EmittedEvent & TraceEvent)
{
FCpuProfilerTrace::OutputEndEvent();
}
# endif
if(EmittedEvent & ThreadStatsEvent)
{
FThreadStats::AddMessage(StatId, EStatOperation::CycleScopeEnd);
}
EmittedEvent = 0;
}
开销时间的获取
调用各平台的高分辨率时间戳来测量时间间隔
FStatMessage
GetValue_int64()= int64(FPlatformTime::Cycles());
FGenericPlatformTime
FGenericPlatformTime
static FORCEINLINE uint32 Cycles()
{
struct timeval tv;
gettimeofday( &tv, NULL );
return (uint32) ((((uint64)tv.tv_sec) * 1000000ULL) + (((uint64)tv.tv_usec)));
}
FWindowsPlatformTime
static FORCEINLINE uint32 Cycles()
{
Windows::LARGE_INTEGER Cycles;
Windows::QueryPerformanceCounter(&Cycles);
return (uint32)Cycles.QuadPart;
}
FSonyPlatformTime
static FORCEINLINE uint32 Cycles()
{
uint64 Time = sceKernelGetProcessTimeCounter() >> CycleShift;
return (uint32)Time;
}
This function returns the value of the monotonic 64-bit counter that is synchronized with the process time.
FUnixTime
static FORCEINLINE uint32 Cycles()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<uint32>(static_cast<uint64>(ts.tv_sec) * 1000000ULL + static_cast<uint64>(ts.tv_nsec) / 1000ULL);
}
FApplePlatformTime
static FORCEINLINE uint32 Cycles()
{
uint64 Cycles = mach_absolute_time();
return Cycles;
}
FAndroidTime
static FORCEINLINE uint32 Cycles()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint32) ((((uint64)ts.tv_sec) * 1000000ULL) + (((uint64)ts.tv_nsec) / 1000ULL));
}
性能数据从获取到显示的全流程
活动图
UE4 Profile Mainflow Activity Diagram
类图
UE4 Profile Main Class Diagram
时序图
全流程
UE4 Profile Mainflow Full Sequence Diagram
UE4 Profile Mainflow Simplified Sequence Diagram
子流程拆解
Add FStatMessage
Handle Ticker
Set InDurationCycles by FStatMessage
Set StatData by InDurationCycles
Set InInclusiveTimeMS by StatData
Update EventGraphs
Display Profiler Event Graph
扩展阅读
UE4-程序性能优化与调试相关笔记
[UE4]Statコマンドに情報を追加しよう
StatsSystemOverview
在UE4C++中的宏
C++宏(Macro)的各种玩法
C/C++中宏/Macro的深入讲解
C/C++获取时间方法:gettimeofday()
QueryPerformanceCounter (QPC) - 获取高分辨率时间戳
QueryPerformanceFrequency function
QueryPerformanceCounter function
使用QueryPerformanceFrequency、QueryPerformanceCounter精确计时
**声明:**本文来自公众号:GameDevLearning,转载请附上原文链接及本声明。