diff --git a/binaries/data/config/default.cfg b/binaries/data/config/default.cfg index a89a933a21..109aabdf55 100644 --- a/binaries/data/config/default.cfg +++ b/binaries/data/config/default.cfg @@ -201,7 +201,13 @@ hotkey.text.move.right = "Ctrl+RightArrow" ; Move cursor to start of word to ; > PROFILER hotkey.profile.toggle = "F11" ; Enable/disable real-time profiler hotkey.profile.save = "Shift+F11" ; Save current profiler data to logs/profile.txt -hotkey.profile2.enable = "F11" ; Enable HTTP server for new profiler +hotkey.profile2.enable = "F11" ; Enable HTTP/GPU modes for new profiler + +profiler2.http.autoenable = false ; Enable HTTP server output at startup (default off for security/performance) +profiler2.gpu.autoenable = false ; Enable GPU timing at startup (default off for performance/compatibility) +profiler2.gpu.arb.enable = true ; Allow GL_ARB_timer_query timing mode when available +profiler2.gpu.ext.enable = true ; Allow GL_EXT_timer_query timing mode when available +profiler2.gpu.intel.enable = true ; Allow GL_INTEL_performance_queries timing mode when available ; > QUICKSAVE hotkey.quicksave = "Shift+F5" diff --git a/source/gui/GUIManager.cpp b/source/gui/GUIManager.cpp index f7befba292..2c55661233 100644 --- a/source/gui/GUIManager.cpp +++ b/source/gui/GUIManager.cpp @@ -272,7 +272,7 @@ void CGUIManager::TickObjects() void CGUIManager::Draw() { - PROFILE3("render gui"); + PROFILE3_GPU("gui"); for (PageStackType::iterator it = m_PageStack.begin(); it != m_PageStack.end(); ++it) it->gui->Draw(); diff --git a/source/gui/IGUIObject.cpp b/source/gui/IGUIObject.cpp index d09c3d6be4..12bee11dc5 100644 --- a/source/gui/IGUIObject.cpp +++ b/source/gui/IGUIObject.cpp @@ -495,6 +495,10 @@ void IGUIObject::SetScriptHandler(const CStr& Action, JSObject* Function) InReaction IGUIObject::SendEvent(EGUIMessageType type, const CStr& EventName) { + PROFILE2_EVENT("gui event"); + PROFILE2_ATTR("type: %s", EventName.c_str()); + PROFILE2_ATTR("object: %s", m_Name.c_str()); + SGUIMessage msg(type); HandleMessage(msg); diff --git a/source/gui/scripting/GuiScriptConversions.cpp b/source/gui/scripting/GuiScriptConversions.cpp index c77e39749a..7f4bd8a2c5 100644 --- a/source/gui/scripting/GuiScriptConversions.cpp +++ b/source/gui/scripting/GuiScriptConversions.cpp @@ -41,6 +41,9 @@ template<> jsval ScriptInterface::ToJSVal(JSContext* cx, SDL_Event_ case SDL_MOUSEMOTION: typeName = "mousemotion"; break; case SDL_MOUSEBUTTONDOWN: typeName = "mousebuttondown"; break; case SDL_MOUSEBUTTONUP: typeName = "mousebuttonup"; break; + case SDL_QUIT: typeName = "quit"; break; + case SDL_VIDEOEXPOSE: typeName = "videoexpose"; break; + case SDL_VIDEORESIZE: typeName = "videoresize"; break; case SDL_HOTKEYDOWN: typeName = "hotkeydown"; break; case SDL_HOTKEYUP: typeName = "hotkeyup"; break; default: typeName = "(unknown)"; break; diff --git a/source/lib/external_libraries/glext_funcs.h b/source/lib/external_libraries/glext_funcs.h index d92cd63e0e..dcb7bc88aa 100644 --- a/source/lib/external_libraries/glext_funcs.h +++ b/source/lib/external_libraries/glext_funcs.h @@ -235,6 +235,40 @@ FUNC2(void, glGetVertexAttribfvARB, glGetVertexAttribfv, "2.0", (GLuint index, G FUNC2(void, glGetVertexAttribivARB, glGetVertexAttribiv, "2.0", (GLuint index, GLenum pname, GLint *params)) FUNC2(void, glGetVertexAttribPointervARB, glGetVertexAttribPointerv, "2.0", (GLuint index, GLenum pname, void **pointer)) +// GL_ARB_occlusion_query / GL1.5: +FUNC2(void, glGenQueriesARB, glGenQueries, "1.5", (GLsizei n, GLuint *ids)) +FUNC2(void, glDeleteQueriesARB, glDeleteQueries, "1.5", (GLsizei n, const GLuint *ids)) +FUNC2(GLboolean, glIsQueryARB, glIsQuery, "1.5", (GLuint id)) +FUNC2(void, glBeginQueryARB, glBeginQuery, "1.5", (GLenum target, GLuint id)) +FUNC2(void, glEndQueryARB, glEndQuery, "1.5", (GLenum target)) +FUNC2(void, glGetQueryivARB, glGetQueryiv, "1.5", (GLenum target, GLenum pname, GLint *params)) +FUNC2(void, glGetQueryObjectivARB, glGetQueryObjectiv, "1.5", (GLuint id, GLenum pname, GLint *params)) +FUNC2(void, glGetQueryObjectuivARB, glGetQueryObjectuiv, "1.5", (GLuint id, GLenum pname, GLuint *params)) + +// GL_ARB_sync / GL3.2: +FUNC2(void, glGetInteger64v, glGetInteger64v, "3.2", (GLenum pname, GLint64 *params)) + +// GL_ARB_timer_query / GL3.3: +FUNC2(void, glQueryCounter, glQueryCounter, "3.3", (GLuint id, GLenum target)) +FUNC2(void, glGetQueryObjecti64v, glGetQueryObjecti64v, "3.3", (GLuint id, GLenum pname, GLint64 *params)) +FUNC2(void, glGetQueryObjectui64v, glGetQueryObjectui64v, "3.3", (GLuint id, GLenum pname, GLuint64 *params)) + +// GL_GREMEDY_string_marker (from gDEBugger) +FUNC(int, glStringMarkerGREMEDY, (GLsizei len, const GLvoid *string)) + +// GL_INTEL_performance_queries (undocumented, may be unstable, use at own risk; +// see http://zaynar.co.uk/docs/gl-intel-performance-queries.html) +FUNC(void, glGetFirstPerfQueryIdINTEL, (GLuint *queryId)) +FUNC(void, glGetNextPerfQueryIdINTEL, (GLuint prevQueryId, GLuint *queryId)) +FUNC(void, glGetPerfQueryInfoINTEL, (GLuint queryId, GLuint nameMaxLength, char *name, GLuint *counterBufferSize, GLuint *numCounters, GLuint *maxQueries, GLuint *)) +FUNC(void, glGetPerfCounterInfoINTEL, (GLuint queryId, GLuint counterId, GLuint nameMaxLength, char *name, GLuint descMaxLength, char *desc, GLuint *offset, GLuint *size, GLuint *usage, GLuint *type, GLuint64 *)) +FUNC(void, glCreatePerfQueryINTEL, (GLuint queryId, GLuint *id)) +FUNC(void, glBeginPerfQueryINTEL, (GLuint id)) +FUNC(void, glEndPerfQueryINTEL, (GLuint id)) +FUNC(void, glDeletePerfQueryINTEL, (GLuint id)) +FUNC(void, glGetPerfQueryDataINTEL, (GLuint id, GLenum requestType, GLuint maxLength, char *buffer, GLuint *length)) + + #if OS_WIN // WGL_EXT_swap_control FUNC(int, wglSwapIntervalEXT, (int)) @@ -250,7 +284,4 @@ FUNC(int, wglQueryPbufferARB, (HPBUFFERARB, int, int*)) FUNC(int, wglGetPixelFormatAttribivARB, (HDC, int, int, unsigned int, const int*, int*)) FUNC(int, wglGetPixelFormatAttribfvARB, (HDC, int, int, unsigned int, const int*, float*)) FUNC(int, wglChoosePixelFormatARB, (HDC, const int *, const float*, unsigned int, int*, unsigned int*)) - -// GL_GREMEDY_string_marker (from gDEBugger) -FUNC(int, glStringMarkerGREMEDY, (GLsizei len, const GLvoid *string)) #endif // OS_WIN diff --git a/source/main.cpp b/source/main.cpp index b55a6d1d7e..a5ea49d7e1 100644 --- a/source/main.cpp +++ b/source/main.cpp @@ -135,6 +135,7 @@ static InReaction MainInputHandler(const SDL_Event_* ev) } else if (hotkey == "profile2.enable") { + g_Profiler2.EnableGPU(); g_Profiler2.EnableHTTP(); return IN_HANDLED; } @@ -151,8 +152,17 @@ static void PumpEvents() PROFILE3("dispatch events"); SDL_Event_ ev; - while(SDL_PollEvent(&ev.ev)) + while (SDL_PollEvent(&ev.ev)) + { + PROFILE2("event"); + if (g_GUI) + { + std::string data = g_GUI->GetScriptInterface().StringifyJSON( + ScriptInterface::ToJSVal(g_GUI->GetScriptInterface().GetContext(), ev)); + PROFILE2_ATTR("%s", data.c_str()); + } in_dispatch_event(&ev); + } } @@ -253,6 +263,8 @@ static void Frame() { g_Profiler2.RecordFrameStart(); PROFILE2("frame"); + g_Profiler2.IncrementFrameNumber(); + PROFILE2_ATTR("%d", g_Profiler2.GetFrameNumber()); ogl_WarnIfError(); diff --git a/source/ps/CConsole.cpp b/source/ps/CConsole.cpp index b4a338de34..0123433535 100644 --- a/source/ps/CConsole.cpp +++ b/source/ps/CConsole.cpp @@ -175,7 +175,7 @@ void CConsole::Render() { if (! (m_bVisible || m_bToggle) ) return; - PROFILE3("render console"); + PROFILE3_GPU("console"); CFont font(CONSOLE_FONT); font.Bind(); diff --git a/source/ps/CLogger.cpp b/source/ps/CLogger.cpp index f9747425e0..cf6ea323ad 100644 --- a/source/ps/CLogger.cpp +++ b/source/ps/CLogger.cpp @@ -277,7 +277,7 @@ void CLogger::LogError(const wchar_t* fmt, ...) void CLogger::Render() { - PROFILE3("render logger"); + PROFILE3_GPU("logger"); CleanupRenderQueue(); diff --git a/source/ps/Game.cpp b/source/ps/Game.cpp index d9ad8aa388..1b833f62a1 100644 --- a/source/ps/Game.cpp +++ b/source/ps/Game.cpp @@ -275,7 +275,11 @@ bool CGame::Update(double deltaTime, bool doInterpolate) if (m_TurnManager->Update(deltaTime, maxTurns)) { - g_GUI->SendEventToAll("SimulationUpdate"); + { + PROFILE3("gui sim update"); + g_GUI->SendEventToAll("SimulationUpdate"); + } + GetView()->GetLOSTexture().MakeDirty(); } } diff --git a/source/ps/GameSetup/GameSetup.cpp b/source/ps/GameSetup/GameSetup.cpp index 25e6098e70..282158f018 100644 --- a/source/ps/GameSetup/GameSetup.cpp +++ b/source/ps/GameSetup/GameSetup.cpp @@ -198,6 +198,10 @@ void Render() ogl_WarnIfError(); + g_Profiler2.RecordGPUFrameStart(); + + ogl_WarnIfError(); + CStr skystring = "255 0 255"; CFG_GET_USER_VAL("skycolor", String, skystring); CColor skycol; @@ -272,7 +276,7 @@ void Render() // Draw the cursor (or set the Windows cursor, on Windows) if (g_DoRenderCursor) { - PROFILE3("render cursor"); + PROFILE3_GPU("cursor"); CStrW cursorName = g_CursorName; if (cursorName.empty()) { @@ -303,6 +307,10 @@ void Render() PROFILE2_ATTR("particles: %d", (int)g_Renderer.GetStats().m_Particles); ogl_WarnIfError(); + + g_Profiler2.RecordGPUFrameEnd(); + + ogl_WarnIfError(); } @@ -647,6 +655,8 @@ void Shutdown(int UNUSED(flags)) tex_codec_unregister_all(); + g_Profiler2.ShutdownGPU(); + TIMER_BEGIN(L"shutdown SDL"); ShutdownSDL(); TIMER_END(L"shutdown SDL"); @@ -850,6 +860,13 @@ void Init(const CmdLineArgs& args, int UNUSED(flags)) // g_ConfigDB, command line args, globals CONFIG_Init(args); + // Optionally start profiler HTTP output automatically + // (By default it's only enabled by a hotkey, for security/performance) + bool profilerHTTPEnable = false; + CFG_GET_USER_VAL("profiler2.http.autoenable", Bool, profilerHTTPEnable); + if (profilerHTTPEnable) + g_Profiler2.EnableHTTP(); + if (!g_Quickstart) g_UserReporter.Initialize(); // after config @@ -879,6 +896,13 @@ void InitGraphics(const CmdLineArgs& args, int flags) ogl_WarnIfError(); + // Optionally start profiler GPU timings automatically + // (By default it's only enabled by a hotkey, for performance/compatibility) + bool profilerGPUEnable = false; + CFG_GET_USER_VAL("profiler2.gpu.autoenable", Bool, profilerGPUEnable); + if (profilerGPUEnable) + g_Profiler2.EnableGPU(); + if(!g_Quickstart) { WriteSystemInfo(); diff --git a/source/ps/Profile.h b/source/ps/Profile.h index 9c32d4f719..5d57454b91 100644 --- a/source/ps/Profile.h +++ b/source/ps/Profile.h @@ -198,4 +198,7 @@ public: // Do both old and new profilers simultaneously (1+2=3), for convenience. #define PROFILE3(name) PROFILE(name); PROFILE2(name) +// Also do GPU +#define PROFILE3_GPU(name) PROFILE(name); PROFILE2(name); PROFILE2_GPU(name) + #endif // INCLUDED_PROFILE diff --git a/source/ps/ProfileViewer.cpp b/source/ps/ProfileViewer.cpp index 407e30f0ab..91c47f2ecf 100644 --- a/source/ps/ProfileViewer.cpp +++ b/source/ps/ProfileViewer.cpp @@ -159,7 +159,7 @@ void CProfileViewer::RenderProfile() return; } - PROFILE3("render profiler"); + PROFILE3_GPU("profile viewer"); glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); diff --git a/source/ps/Profiler2.cpp b/source/ps/Profiler2.cpp index be8d661ebf..0eb1239e43 100644 --- a/source/ps/Profiler2.cpp +++ b/source/ps/Profiler2.cpp @@ -1,18 +1,23 @@ -/* Copyright (C) 2011 Wildfire Games. - * This file is part of 0 A.D. +/* Copyright (c) 2011 Wildfire Games * - * 0 A.D. is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or - * (at your option) any later version. - * - * 0 A.D. is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with 0 A.D. If not, see . + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" @@ -21,6 +26,7 @@ #include "lib/allocators/shared_ptr.h" #include "ps/CLogger.h" +#include "ps/Profiler2GPU.h" #include "third_party/mongoose/mongoose.h" CProfiler2 g_Profiler2; @@ -29,10 +35,15 @@ CProfiler2 g_Profiler2; const u8 CProfiler2::RESYNC_MAGIC[8] = {0x11, 0x22, 0x33, 0x44, 0xf4, 0x93, 0xbe, 0x15}; CProfiler2::CProfiler2() : - m_Initialised(false), m_MgContext(NULL) + m_Initialised(false), m_FrameNumber(0), m_MgContext(NULL), m_GPU(NULL) { } +CProfiler2::~CProfiler2() +{ + ENSURE(!m_Initialised); // should have called Shutdown() explicitly +} + /** * Mongoose callback. Run in an arbitrary thread (possibly concurrently with other requests). */ @@ -133,6 +144,12 @@ void CProfiler2::Initialise() RegisterCurrentThread("main"); } +void CProfiler2::InitialiseGPU() +{ + ENSURE(!m_GPU); + m_GPU = new CProfiler2GPU(*this); +} + void CProfiler2::EnableHTTP() { ENSURE(m_Initialised); @@ -150,10 +167,24 @@ void CProfiler2::EnableHTTP() ENSURE(m_MgContext); } +void CProfiler2::EnableGPU() +{ + ENSURE(m_Initialised); + if (!m_GPU) + InitialiseGPU(); +} + +void CProfiler2::ShutdownGPU() +{ + SAFE_DELETE(m_GPU); +} + void CProfiler2::Shutdown() { ENSURE(m_Initialised); + ENSURE(!m_GPU); // must shutdown GPU before profiler + if (m_MgContext) { mg_stop(m_MgContext); @@ -167,6 +198,30 @@ void CProfiler2::Shutdown() m_Initialised = false; } +void CProfiler2::RecordGPUFrameStart() +{ + if (m_GPU) + m_GPU->FrameStart(); +} + +void CProfiler2::RecordGPUFrameEnd() +{ + if (m_GPU) + m_GPU->FrameEnd(); +} + +void CProfiler2::RecordGPURegionEnter(const char* id) +{ + if (m_GPU) + m_GPU->RegionEnter(id); +} + +void CProfiler2::RecordGPURegionLeave(const char* id) +{ + if (m_GPU) + m_GPU->RegionLeave(id); +} + /** * Called by pthreads when a registered thread is destroyed. */ @@ -174,12 +229,7 @@ void CProfiler2::TLSDtor(void* data) { ThreadStorage* storage = (ThreadStorage*)data; - CProfiler2& profiler = storage->GetProfiler(); - - { - CScopeLock lock(profiler.m_Mutex); - profiler.m_Threads.erase(std::find(profiler.m_Threads.begin(), profiler.m_Threads.end(), storage)); - } + storage->GetProfiler().RemoveThreadStorage(storage); delete (ThreadStorage*)data; } @@ -197,10 +247,21 @@ void CProfiler2::RegisterCurrentThread(const std::string& name) RecordSyncMarker(); RecordEvent("thread start"); + AddThreadStorage(storage); +} + +void CProfiler2::AddThreadStorage(ThreadStorage* storage) +{ CScopeLock lock(m_Mutex); m_Threads.push_back(storage); } +void CProfiler2::RemoveThreadStorage(ThreadStorage* storage) +{ + CScopeLock lock(m_Mutex); + m_Threads.erase(std::find(m_Threads.begin(), m_Threads.end(), storage)); +} + CProfiler2::ThreadStorage::ThreadStorage(CProfiler2& profiler, const std::string& name) : m_Profiler(profiler), m_Name(name), m_BufferPos0(0), m_BufferPos1(0), m_LastTime(timer_Time()) { @@ -256,6 +317,8 @@ void CProfiler2::ThreadStorage::RecordAttribute(const char* fmt, va_list argp) void CProfiler2::ConstructJSONOverview(std::ostream& stream) { + TIMER(L"profile2 overview"); + CScopeLock lock(m_Mutex); stream << "{\"threads\":["; @@ -275,6 +338,8 @@ void CProfiler2::ConstructJSONOverview(std::ostream& stream) template void RunBufferVisitor(const std::string& buffer, V& visitor) { + TIMER(L"profile2 visitor"); + // The buffer doesn't necessarily start at the beginning of an item // (we just grabbed it from some arbitrary point in the middle), // so scan forwards until we find a sync marker. @@ -421,24 +486,33 @@ public: const char* CProfiler2::ConstructJSONResponse(std::ostream& stream, const std::string& thread) { - CScopeLock lock(m_Mutex); + TIMER(L"profile2 query"); + + std::string buffer; - ThreadStorage* storage = NULL; - for (size_t i = 0; i < m_Threads.size(); ++i) { - if (m_Threads[i]->GetName() == thread) + TIMER(L"profile2 get buffer"); + + CScopeLock lock(m_Mutex); // lock against changes to m_Threads or deletions of ThreadStorage + + ThreadStorage* storage = NULL; + for (size_t i = 0; i < m_Threads.size(); ++i) { - storage = m_Threads[i]; - break; + if (m_Threads[i]->GetName() == thread) + { + storage = m_Threads[i]; + break; + } } + + if (!storage) + return "cannot find named thread"; + + stream << "{\"events\":[\n"; + + buffer = storage->GetBuffer(); } - if (!storage) - return "cannot find named thread"; - - stream << "{\"events\":[\n"; - - std::string buffer = storage->GetBuffer(); BufferVisitor_Dump visitor(stream); RunBufferVisitor(buffer, visitor); diff --git a/source/ps/Profiler2.h b/source/ps/Profiler2.h index 617121043a..c058d87df4 100644 --- a/source/ps/Profiler2.h +++ b/source/ps/Profiler2.h @@ -1,18 +1,23 @@ -/* Copyright (C) 2011 Wildfire Games. - * This file is part of 0 A.D. +/* Copyright (c) 2011 Wildfire Games * - * 0 A.D. is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or - * (at your option) any later version. - * - * 0 A.D. is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with 0 A.D. If not, see . + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** @@ -80,8 +85,12 @@ struct mg_context; // Note: Lots of functions are defined inline, to hypothetically // minimise performance overhead. +class CProfiler2GPU; + class CProfiler2 { + friend class CProfiler2GPU_base; + public: // Items stored in the buffers: @@ -113,7 +122,7 @@ public: private: // TODO: what's a good size? // TODO: different threads might want different sizes - static const size_t BUFFER_SIZE = 128*1024; + static const size_t BUFFER_SIZE = 1024*1024; /** * Class instantiated in every registered thread. @@ -153,6 +162,14 @@ private: } void RecordAttribute(const char* fmt, va_list argp); + + void RecordAttributePrintf(const char* fmt, ...) + { + va_list argp; + va_start(argp, fmt); + RecordAttribute(fmt, argp); + va_end(argp); + } CProfiler2& GetProfiler() { @@ -230,6 +247,7 @@ private: public: CProfiler2(); + ~CProfiler2(); /** * Call in main thread to set up the profiler, @@ -244,6 +262,18 @@ public: */ void EnableHTTP(); + /** + * Call in main thread to enable the GPU profiling support, + * after OpenGL has been initialised. + */ + void EnableGPU(); + + /** + * Call in main thread to shut down the GPU profiling support, + * before shutting down OpenGL. + */ + void ShutdownGPU(); + /** * Call in main thread to shut everything down. * All other profiled threads should have been terminated already. @@ -272,6 +302,7 @@ public: */ void RecordFrameStart() { + ENSURE(ThreadUtil::IsMainThread()); GetThreadStorage().RecordFrameStart(GetTime()); } @@ -298,6 +329,11 @@ public: va_end(argp); } + void RecordGPUFrameStart(); + void RecordGPUFrameEnd(); + void RecordGPURegionEnter(const char* id); + void RecordGPURegionLeave(const char* id); + /** * Call in any thread to produce a JSON representation of the general * state of the application. @@ -311,7 +347,27 @@ public: */ const char* ConstructJSONResponse(std::ostream& stream, const std::string& thread); + double GetTime() + { + return timer_Time(); + } + + int GetFrameNumber() + { + return m_FrameNumber; + } + + void IncrementFrameNumber() + { + ++m_FrameNumber; + } + + void AddThreadStorage(ThreadStorage* storage); + void RemoveThreadStorage(ThreadStorage* storage); + private: + void InitialiseGPU(); + static void TLSDtor(void* data); ThreadStorage& GetThreadStorage() @@ -321,17 +377,16 @@ private: return *storage; } - double GetTime() - { - return timer_Time(); - } - bool m_Initialised; + + int m_FrameNumber; mg_context* m_MgContext; pthread_key_t m_TLS; + CProfiler2GPU* m_GPU; + CMutex m_Mutex; std::vector m_Threads; // thread-safe; protected by m_Mutex }; @@ -356,6 +411,24 @@ private: const char* m_Name; }; +/** + * Scope-based GPU enter/leave helper. + */ +class CProfile2GPURegion +{ +public: + CProfile2GPURegion(const char* name) : m_Name(name) + { + g_Profiler2.RecordGPURegionEnter(m_Name); + } + ~CProfile2GPURegion() + { + g_Profiler2.RecordGPURegionLeave(m_Name); + } +private: + const char* m_Name; +}; + /** * Starts timing from now until the end of the current scope. * @p region is the name to associate with this region (should be @@ -365,6 +438,8 @@ private: */ #define PROFILE2(region) CProfile2Region profile2__(region) +#define PROFILE2_GPU(region) CProfile2GPURegion profile2gpu__(region) + /** * Record the named event at the current time. */ diff --git a/source/ps/Profiler2GPU.cpp b/source/ps/Profiler2GPU.cpp new file mode 100644 index 0000000000..74ddac9bcc --- /dev/null +++ b/source/ps/Profiler2GPU.cpp @@ -0,0 +1,841 @@ +/* Copyright (c) 2011 Wildfire Games + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "precompiled.h" + +#include "Profiler2GPU.h" + +#include "lib/ogl.h" +#include "lib/allocators/shared_ptr.h" +#include "ps/ConfigDB.h" +#include "ps/Profiler2.h" + +class CProfiler2GPU_base +{ + NONCOPYABLE(CProfiler2GPU_base); + +protected: + CProfiler2GPU_base(CProfiler2& profiler, const char* name) : + m_Profiler(profiler), m_Storage(profiler, name) + { + m_Storage.RecordSyncMarker(m_Profiler.GetTime()); + m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start"); + + m_Profiler.AddThreadStorage(&m_Storage); + } + + ~CProfiler2GPU_base() + { + m_Profiler.RemoveThreadStorage(&m_Storage); + } + + CProfiler2& m_Profiler; + CProfiler2::ThreadStorage m_Storage; +}; + +////////////////////////////////////////////////////////////////////////// + +// Base class for ARB_timer_query, EXT_timer_query +class CProfiler2GPU_timer_query : public CProfiler2GPU_base +{ +protected: + CProfiler2GPU_timer_query(CProfiler2& profiler, const char* name) : + CProfiler2GPU_base(profiler, name) + { + } + + ~CProfiler2GPU_timer_query() + { + pglDeleteQueriesARB(m_FreeQueries.size(), m_FreeQueries.data()); + ogl_WarnIfError(); + } + + // Returns a new GL query object (or a recycled old one) + GLuint NewQuery() + { + if (m_FreeQueries.empty()) + { + // Generate a batch of new queries + m_FreeQueries.resize(8); + pglGenQueriesARB(m_FreeQueries.size(), m_FreeQueries.data()); + ogl_WarnIfError(); + } + + GLuint query = m_FreeQueries.back(); + m_FreeQueries.pop_back(); + return query; + } + + std::vector m_FreeQueries; // query objects that are allocated but not currently in used +}; + +////////////////////////////////////////////////////////////////////////// + +/* + * GL_ARB_timer_query supports sync and async queries for absolute GPU + * timestamps, which lets us time regions of code relative to the CPU. + * At the start of a frame, we record the CPU time and sync GPU timestamp, + * giving the time-vs-timestamp offset. + * At each enter/leave-region event, we do an async GPU timestamp query. + * When all the queries for a frame have their results available, + * we convert their GPU timestamps into CPU times and record the data. + */ +class CProfiler2GPU_ARB_timer_query : public CProfiler2GPU_timer_query +{ + struct SEvent + { + const char* id; + GLuint query; + bool isEnter; // true if entering region; false if leaving + }; + + struct SFrame + { + u32 num; + + double syncTimeStart; // CPU time at start of maybe this frame or a recent one + GLint64 syncTimestampStart; // GL timestamp corresponding to timeStart + + std::vector events; + }; + + std::deque m_Frames; + +public: + static bool IsSupported() + { + return ogl_HaveExtension("GL_ARB_timer_query"); + } + + CProfiler2GPU_ARB_timer_query(CProfiler2& profiler) : + CProfiler2GPU_timer_query(profiler, "gpu_arb") + { + // TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's + // high enough (but apparently it might trigger GL errors on ATI) + } + + ~CProfiler2GPU_ARB_timer_query() + { + // Pop frames to return queries to the free list + while (!m_Frames.empty()) + PopFrontFrame(); + } + + void FrameStart() + { + ProcessFrames(); + + SFrame frame; + frame.num = m_Profiler.GetFrameNumber(); + + // On (at least) some NVIDIA Windows drivers, when GPU-bound, or when + // vsync enabled and not CPU-bound, the first glGet* call at the start + // of a frame appears to trigger a wait (to stop the GPU getting too + // far behind, or to wait for the vsync period). + // That will be this GL_TIMESTAMP get, which potentially distorts the + // reported results. So we'll only do it fairly rarely, and for most + // frames we'll just assume the clocks don't drift much + + const double RESYNC_PERIOD = 1.0; // seconds + + double now = m_Profiler.GetTime(); + + if (m_Frames.empty() || now > m_Frames.back().syncTimeStart + RESYNC_PERIOD) + { + PROFILE2("profile timestamp resync"); + + pglGetInteger64v(GL_TIMESTAMP, &frame.syncTimestampStart); + ogl_WarnIfError(); + + frame.syncTimeStart = m_Profiler.GetTime(); + // (Have to do GetTime again after GL_TIMESTAMP, because GL_TIMESTAMP + // might wait a while before returning its now-current timestamp) + } + else + { + // Reuse the previous frame's sync data + frame.syncTimeStart = m_Frames[m_Frames.size()-1].syncTimeStart; + frame.syncTimestampStart = m_Frames[m_Frames.size()-1].syncTimestampStart; + } + + m_Frames.push_back(frame); + + RegionEnter("frame"); + } + + void FrameEnd() + { + RegionLeave("frame"); + } + + void RecordRegion(const char* id, bool isEnter) + { + ENSURE(!m_Frames.empty()); + SFrame& frame = m_Frames.back(); + + SEvent event; + event.id = id; + event.query = NewQuery(); + event.isEnter = isEnter; + + pglQueryCounter(event.query, GL_TIMESTAMP); + ogl_WarnIfError(); + + frame.events.push_back(event); + } + + void RegionEnter(const char* id) + { + RecordRegion(id, true); + } + + void RegionLeave(const char* id) + { + RecordRegion(id, false); + } + +private: + + void ProcessFrames() + { + while (!m_Frames.empty()) + { + SFrame& frame = m_Frames.front(); + + // Queries become available in order so we only need to check the last one + GLint available = 0; + pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available); + ogl_WarnIfError(); + if (!available) + break; + + // The frame's queries are now available, so retrieve and record all their results: + + for (size_t i = 0; i < frame.events.size(); ++i) + { + GLuint64 queryTimestamp = 0; + pglGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryTimestamp); + ogl_WarnIfError(); + + // Convert to absolute CPU-clock time + double t = frame.syncTimeStart + (double)(queryTimestamp - frame.syncTimestampStart) / 1e9; + + // Record a frame-start for syncing + if (i == 0) + m_Storage.RecordFrameStart(t); + + if (frame.events[i].isEnter) + m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id); + else + m_Storage.Record(CProfiler2::ITEM_LEAVE, t, frame.events[i].id); + + // Associate the frame number with the "frame" region + if (i == 0) + m_Storage.RecordAttributePrintf("%d", frame.num); + } + + PopFrontFrame(); + } + } + + void PopFrontFrame() + { + ENSURE(!m_Frames.empty()); + SFrame& frame = m_Frames.front(); + for (size_t i = 0; i < frame.events.size(); ++i) + m_FreeQueries.push_back(frame.events[i].query); + m_Frames.pop_front(); + } +}; + +////////////////////////////////////////////////////////////////////////// + +/* + * GL_EXT_timer_query only supports async queries for elapsed time, + * and only a single simultaneous query. + * We can't correctly convert it to absolute time, so we just pretend + * each GPU frame starts the same time as the CPU for that frame. + * We do a query for elapsed time between every adjacent enter/leave-region event. + * When all the queries for a frame have their results available, + * we sum the elapsed times to calculate when each event occurs within the + * frame, and record the data. + */ +class CProfiler2GPU_EXT_timer_query : public CProfiler2GPU_timer_query +{ + struct SEvent + { + const char* id; + GLuint query; // query for time elapsed from this event until the next, or 0 for final event + bool isEnter; // true if entering region; false if leaving + }; + + struct SFrame + { + u32 num; + double timeStart; // CPU time at frame start + std::vector events; + }; + + std::deque m_Frames; + +public: + static bool IsSupported() + { + return ogl_HaveExtension("GL_EXT_timer_query"); + } + + CProfiler2GPU_EXT_timer_query(CProfiler2& profiler) : + CProfiler2GPU_timer_query(profiler, "gpu_ext") + { + } + + ~CProfiler2GPU_EXT_timer_query() + { + // Pop frames to return queries to the free list + while (!m_Frames.empty()) + PopFrontFrame(); + } + + void FrameStart() + { + ProcessFrames(); + + SFrame frame; + frame.num = m_Profiler.GetFrameNumber(); + frame.timeStart = m_Profiler.GetTime(); + + m_Frames.push_back(frame); + + RegionEnter("frame"); + } + + void FrameEnd() + { + RegionLeave("frame"); + + pglEndQueryARB(GL_TIME_ELAPSED); + ogl_WarnIfError(); + } + + void RecordRegion(const char* id, bool isEnter) + { + ENSURE(!m_Frames.empty()); + SFrame& frame = m_Frames.back(); + + SEvent event; + event.id = id; + event.query = NewQuery(); + event.isEnter = isEnter; + + if (!frame.events.empty()) + { + pglEndQueryARB(GL_TIME_ELAPSED); + ogl_WarnIfError(); + } + + pglBeginQueryARB(GL_TIME_ELAPSED, event.query); + ogl_WarnIfError(); + + frame.events.push_back(event); + } + + void RegionEnter(const char* id) + { + RecordRegion(id, true); + } + + void RegionLeave(const char* id) + { + RecordRegion(id, false); + } + +private: + void ProcessFrames() + { + while (!m_Frames.empty()) + { + SFrame& frame = m_Frames.front(); + + // Queries become available in order so we only need to check the last one + GLint available = 0; + pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available); + ogl_WarnIfError(); + if (!available) + break; + + // The frame's queries are now available, so retrieve and record all their results: + + double t = frame.timeStart; + m_Storage.RecordFrameStart(t); + + for (size_t i = 0; i < frame.events.size(); ++i) + { + if (frame.events[i].isEnter) + m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id); + else + m_Storage.Record(CProfiler2::ITEM_LEAVE, t, frame.events[i].id); + + // Associate the frame number with the "frame" region + if (i == 0) + m_Storage.RecordAttributePrintf("%d", frame.num); + + // Advance by the elapsed time to the next event + GLuint64 queryElapsed = 0; + pglGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryElapsed); + ogl_WarnIfError(); + t += (double)queryElapsed / 1e9; + } + + PopFrontFrame(); + } + } + + void PopFrontFrame() + { + ENSURE(!m_Frames.empty()); + SFrame& frame = m_Frames.front(); + for (size_t i = 0; i < frame.events.size(); ++i) + m_FreeQueries.push_back(frame.events[i].query); + m_Frames.pop_front(); + } +}; + +////////////////////////////////////////////////////////////////////////// + +/* + * GL_INTEL_performance_queries is not officially documented + * (see http://zaynar.co.uk/docs/gl-intel-performance-queries.html) + * but it's potentially useful so we'll support it anyway. + * It supports async queries giving elapsed time plus a load of other + * counters that we'd like to use, and supports many simultaneous queries + * (unlike GL_EXT_timer_query). + * There are multiple query types (typically 2), each with its own set of + * multiple counters. + * On each enter-region event, we start a new set of queries. + * On each leave-region event, we end the corresponding set of queries. + * We can't tell the offsets between the enter events of nested regions, + * so we pretend they all got entered at the same time. + */ +class CProfiler2GPU_INTEL_performance_queries : public CProfiler2GPU_base +{ + struct SEvent + { + const char* id; + bool isEnter; + std::vector queries; // if isEnter, one per SPerfQueryType; else empty + }; + + struct SFrame + { + u32 num; + double timeStart; // CPU time at frame start + std::vector events; + std::vector activeRegions; // stack of indexes into events + }; + + std::deque m_Frames; + + // Counters listed by the graphics driver for a particular query type + struct SPerfCounter + { + std::string name; + std::string desc; + GLuint offset; + GLuint size; + GLuint type; + }; + + // Query types listed by the graphics driver + struct SPerfQueryType + { + GLuint queryTypeId; + std::string name; + GLuint counterBufferSize; + std::vector counters; + + std::vector freeQueries; // query objects that are allocated but not currently in use + }; + + std::vector m_QueryTypes; + + #define INTEL_PERFQUERIES_NONBLOCK 0x83FA + #define INTEL_PERFQUERIES_BLOCK 0x83FB + #define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT 0x9402 + #define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64 0x9403 + #define INTEL_PERFQUERIES_TYPE_FLOAT 0x9404 + #define INTEL_PERFQUERIES_TYPE_BOOL 0x9406 + +public: + static bool IsSupported() + { + return ogl_HaveExtension("GL_INTEL_performance_queries"); + } + + CProfiler2GPU_INTEL_performance_queries(CProfiler2& profiler) : + CProfiler2GPU_base(profiler, "gpu_intel") + { + LoadPerfCounters(); + } + + ~CProfiler2GPU_INTEL_performance_queries() + { + // Pop frames to return queries to the free list + while (!m_Frames.empty()) + PopFrontFrame(); + + for (size_t i = 0; i < m_QueryTypes.size(); ++i) + for (size_t j = 0; j < m_QueryTypes[i].freeQueries.size(); ++j) + pglDeletePerfQueryINTEL(m_QueryTypes[i].freeQueries[j]); + + ogl_WarnIfError(); + } + + void FrameStart() + { + ProcessFrames(); + + SFrame frame; + frame.num = m_Profiler.GetFrameNumber(); + frame.timeStart = m_Profiler.GetTime(); + + m_Frames.push_back(frame); + + RegionEnter("frame"); + } + + void FrameEnd() + { + RegionLeave("frame"); + } + + void RegionEnter(const char* id) + { + ENSURE(!m_Frames.empty()); + SFrame& frame = m_Frames.back(); + + SEvent event; + event.id = id; + event.isEnter = true; + + for (size_t i = 0; i < m_QueryTypes.size(); ++i) + { + GLuint id = NewQuery(i); + pglBeginPerfQueryINTEL(id); + ogl_WarnIfError(); + event.queries.push_back(id); + } + + frame.activeRegions.push_back(frame.events.size()); + + frame.events.push_back(event); + } + + void RegionLeave(const char* id) + { + ENSURE(!m_Frames.empty()); + SFrame& frame = m_Frames.back(); + + ENSURE(!frame.activeRegions.empty()); + SEvent& activeEvent = frame.events[frame.activeRegions.back()]; + + for (size_t i = 0; i < m_QueryTypes.size(); ++i) + { + pglEndPerfQueryINTEL(activeEvent.queries[i]); + ogl_WarnIfError(); + } + + frame.activeRegions.pop_back(); + + SEvent event; + event.id = id; + event.isEnter = false; + frame.events.push_back(event); + } + +private: + GLuint NewQuery(size_t queryIdx) + { + ENSURE(queryIdx < m_QueryTypes.size()); + + if (m_QueryTypes[queryIdx].freeQueries.empty()) + { + GLuint id; + pglCreatePerfQueryINTEL(m_QueryTypes[queryIdx].queryTypeId, &id); + ogl_WarnIfError(); + return id; + } + + GLuint id = m_QueryTypes[queryIdx].freeQueries.back(); + m_QueryTypes[queryIdx].freeQueries.pop_back(); + return id; + } + + void ProcessFrames() + { + while (!m_Frames.empty()) + { + SFrame& frame = m_Frames.front(); + + // Queries don't become available in order, so check them all before + // trying to read the results from any + for (size_t j = 0; j < m_QueryTypes.size(); ++j) + { + size_t size = m_QueryTypes[j].counterBufferSize; + shared_ptr buf(new char[size], ArrayDeleter()); + + for (size_t i = 0; i < frame.events.size(); ++i) + { + if (!frame.events[i].isEnter) + continue; + + GLuint length = 0; + pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_NONBLOCK, size, buf.get(), &length); + ogl_WarnIfError(); + if (length == 0) + return; + } + } + + double lastTime = frame.timeStart; + std::stack endTimes; + + m_Storage.RecordFrameStart(frame.timeStart); + + for (size_t i = 0; i < frame.events.size(); ++i) + { + if (frame.events[i].isEnter) + { + m_Storage.Record(CProfiler2::ITEM_ENTER, lastTime, frame.events[i].id); + + if (i == 0) + m_Storage.RecordAttributePrintf("%d", frame.num); + + double elapsed = 0.0; + + for (size_t j = 0; j < m_QueryTypes.size(); ++j) + { + GLuint length; + char* buf = new char[m_QueryTypes[j].counterBufferSize]; + pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_BLOCK, m_QueryTypes[j].counterBufferSize, buf, &length); + ogl_WarnIfError(); + ENSURE(length == m_QueryTypes[j].counterBufferSize); + + m_Storage.RecordAttributePrintf("-- %hs --", m_QueryTypes[j].name.c_str()); + + for (size_t k = 0; k < m_QueryTypes[j].counters.size(); ++k) + { + SPerfCounter& counter = m_QueryTypes[j].counters[k]; + + if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT) + { + ENSURE(counter.size == 4); + GLuint value; + memcpy(&value, buf + counter.offset, counter.size); + m_Storage.RecordAttributePrintf("%hs: %d", counter.name.c_str(), value); + } + else if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64) + { + ENSURE(counter.size == 8); + GLuint64 value; + memcpy(&value, buf + counter.offset, counter.size); + m_Storage.RecordAttributePrintf("%hs: %.0f", counter.name.c_str(), (double)value); + + if (counter.name == "TotalTime") + elapsed = (double)value / 1e6; + } + else if (counter.type == INTEL_PERFQUERIES_TYPE_FLOAT) + { + ENSURE(counter.size == 4); + GLfloat value; + memcpy(&value, buf + counter.offset, counter.size); + m_Storage.RecordAttributePrintf("%hs: %f", counter.name.c_str(), value); + } + else if (counter.type == INTEL_PERFQUERIES_TYPE_BOOL) + { + ENSURE(counter.size == 4); + GLuint value; + memcpy(&value, buf + counter.offset, counter.size); + ENSURE(value == 0 || value == 1); + m_Storage.RecordAttributePrintf("%hs: %d", counter.name.c_str(), value); + } + else + { + debug_warn(L"unrecognised Intel performance counter type"); + } + } + + delete[] buf; + } + + endTimes.push(lastTime + elapsed); + } + else + { + lastTime = endTimes.top(); + endTimes.pop(); + m_Storage.Record(CProfiler2::ITEM_LEAVE, lastTime, frame.events[i].id); + } + } + + PopFrontFrame(); + } + } + + void PopFrontFrame() + { + ENSURE(!m_Frames.empty()); + SFrame& frame = m_Frames.front(); + for (size_t i = 0; i < frame.events.size(); ++i) + if (frame.events[i].isEnter) + for (size_t j = 0; j < m_QueryTypes.size(); ++j) + m_QueryTypes[j].freeQueries.push_back(frame.events[i].queries[j]); + m_Frames.pop_front(); + } + + void LoadPerfCounters() + { + GLuint queryTypeId; + pglGetFirstPerfQueryIdINTEL(&queryTypeId); + ogl_WarnIfError(); + do + { + char queryName[256]; + GLuint counterBufferSize, numCounters, maxQueries, unknown; + pglGetPerfQueryInfoINTEL(queryTypeId, ARRAY_SIZE(queryName), queryName, &counterBufferSize, &numCounters, &maxQueries, &unknown); + ogl_WarnIfError(); + ENSURE(unknown == 1); + + SPerfQueryType query; + query.queryTypeId = queryTypeId; + query.name = queryName; + query.counterBufferSize = counterBufferSize; + + for (GLuint counterId = 1; counterId <= numCounters; ++counterId) + { + char counterName[256]; + char counterDesc[2048]; + GLuint counterOffset, counterSize, counterUsage, counterType; + GLuint64 unknown2; + pglGetPerfCounterInfoINTEL(queryTypeId, counterId, ARRAY_SIZE(counterName), counterName, ARRAY_SIZE(counterDesc), counterDesc, &counterOffset, &counterSize, &counterUsage, &counterType, &unknown2); + ogl_WarnIfError(); + ENSURE(unknown2 == 0 || unknown2 == 1); + + SPerfCounter counter; + counter.name = counterName; + counter.desc = counterDesc; + counter.offset = counterOffset; + counter.size = counterSize; + counter.type = counterType; + query.counters.push_back(counter); + } + + m_QueryTypes.push_back(query); + + pglGetNextPerfQueryIdINTEL(queryTypeId, &queryTypeId); + ogl_WarnIfError(); + + } while (queryTypeId); + } +}; + +////////////////////////////////////////////////////////////////////////// + +CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) : + m_Profiler(profiler), m_ProfilerARB(NULL), m_ProfilerEXT(NULL), m_ProfilerINTEL(NULL) +{ + bool enabledARB = false; + bool enabledEXT = false; + bool enabledINTEL = false; + CFG_GET_USER_VAL("profiler2.gpu.arb.enable", Bool, enabledARB); + CFG_GET_USER_VAL("profiler2.gpu.ext.enable", Bool, enabledEXT); + CFG_GET_USER_VAL("profiler2.gpu.intel.enable", Bool, enabledINTEL); + + if (enabledARB && CProfiler2GPU_ARB_timer_query::IsSupported()) + { + m_ProfilerARB = new CProfiler2GPU_ARB_timer_query(m_Profiler); + } + + if (enabledEXT && CProfiler2GPU_EXT_timer_query::IsSupported()) + { + m_ProfilerEXT = new CProfiler2GPU_EXT_timer_query(m_Profiler); + } + + if (enabledINTEL && CProfiler2GPU_INTEL_performance_queries::IsSupported()) + { + m_ProfilerINTEL = new CProfiler2GPU_INTEL_performance_queries(m_Profiler); + } +} + +CProfiler2GPU::~CProfiler2GPU() +{ + SAFE_DELETE(m_ProfilerARB); + SAFE_DELETE(m_ProfilerEXT); + SAFE_DELETE(m_ProfilerINTEL); +} + +void CProfiler2GPU::FrameStart() +{ + if (m_ProfilerARB) + m_ProfilerARB->FrameStart(); + + if (m_ProfilerEXT) + m_ProfilerEXT->FrameStart(); + + if (m_ProfilerINTEL) + m_ProfilerINTEL->FrameStart(); +} + +void CProfiler2GPU::FrameEnd() +{ + if (m_ProfilerARB) + m_ProfilerARB->FrameEnd(); + + if (m_ProfilerEXT) + m_ProfilerEXT->FrameEnd(); + + if (m_ProfilerINTEL) + m_ProfilerINTEL->FrameEnd(); +} + +void CProfiler2GPU::RegionEnter(const char* id) +{ + if (m_ProfilerARB) + m_ProfilerARB->RegionEnter(id); + + if (m_ProfilerEXT) + m_ProfilerEXT->RegionEnter(id); + + if (m_ProfilerINTEL) + m_ProfilerINTEL->RegionEnter(id); +} + +void CProfiler2GPU::RegionLeave(const char* id) +{ + if (m_ProfilerARB) + m_ProfilerARB->RegionLeave(id); + + if (m_ProfilerEXT) + m_ProfilerEXT->RegionLeave(id); + + if (m_ProfilerINTEL) + m_ProfilerINTEL->RegionLeave(id); +} diff --git a/source/ps/Profiler2GPU.h b/source/ps/Profiler2GPU.h new file mode 100644 index 0000000000..a738578f09 --- /dev/null +++ b/source/ps/Profiler2GPU.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2011 Wildfire Games + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +class CProfiler2; +class CProfiler2GPU_ARB_timer_query; +class CProfiler2GPU_EXT_timer_query; +class CProfiler2GPU_INTEL_performance_queries; + +/** + * Used by CProfiler2 for GPU profiling support. + */ +class CProfiler2GPU +{ + NONCOPYABLE(CProfiler2GPU); + +public: + CProfiler2GPU(CProfiler2& profiler); + ~CProfiler2GPU(); + + void FrameStart(); + void FrameEnd(); + void RegionEnter(const char* id); + void RegionLeave(const char* id); + +private: + CProfiler2& m_Profiler; + + CProfiler2GPU_ARB_timer_query* m_ProfilerARB; + CProfiler2GPU_EXT_timer_query* m_ProfilerEXT; + CProfiler2GPU_INTEL_performance_queries* m_ProfilerINTEL; +}; diff --git a/source/ps/UserReport.cpp b/source/ps/UserReport.cpp index 23c3912901..c831e5580a 100644 --- a/source/ps/UserReport.cpp +++ b/source/ps/UserReport.cpp @@ -28,6 +28,7 @@ #include "lib/sysdep/sysdep.h" #include "ps/ConfigDB.h" #include "ps/Filesystem.h" +#include "ps/Profiler2.h" #include "ps/ThreadUtil.h" #define DEBUG_UPLOADS 0 @@ -230,6 +231,7 @@ private: static void* RunThread(void* data) { debug_SetThreadName("CUserReportWorker"); + g_Profiler2.RegisterCurrentThread("userreport"); static_cast(data)->Run(); @@ -242,8 +244,12 @@ private: // (This has to be done in the thread because it's potentially very slow) SetStatus("proxy"); std::wstring proxy; - if (sys_get_proxy_config(wstring_from_utf8(m_URL), proxy) == INFO::OK) - curl_easy_setopt(m_Curl, CURLOPT_PROXY, utf8_from_wstring(proxy).c_str()); + + { + PROFILE2("get proxy config"); + if (sys_get_proxy_config(wstring_from_utf8(m_URL), proxy) == INFO::OK) + curl_easy_setopt(m_Curl, CURLOPT_PROXY, utf8_from_wstring(proxy).c_str()); + } SetStatus("waiting"); @@ -267,9 +273,13 @@ private: * occasionally so it can check its timer. */ + g_Profiler2.RecordRegionEnter("semaphore wait"); + // Wait until the main thread wakes us up while (SDL_SemWait(m_WorkerSem) == 0) { + g_Profiler2.RecordRegionLeave("semaphore wait"); + // Handle shutdown requests as soon as possible if (GetShutdown()) return; @@ -291,6 +301,8 @@ private: return; } } + + g_Profiler2.RecordRegionLeave("semaphore wait"); } bool GetEnabled() @@ -316,6 +328,8 @@ private: bool ProcessReport() { + PROFILE2("process report"); + shared_ptr report; { diff --git a/source/renderer/OverlayRenderer.cpp b/source/renderer/OverlayRenderer.cpp index 4afbc7970b..4e4910333c 100644 --- a/source/renderer/OverlayRenderer.cpp +++ b/source/renderer/OverlayRenderer.cpp @@ -138,7 +138,7 @@ void OverlayRenderer::PrepareForRendering() void OverlayRenderer::RenderOverlaysBeforeWater() { - PROFILE3("render overlays (before)"); + PROFILE3_GPU("overlays (before)"); glDisable(GL_TEXTURE_2D); glEnable(GL_BLEND); @@ -166,7 +166,7 @@ void OverlayRenderer::RenderOverlaysBeforeWater() void OverlayRenderer::RenderOverlaysAfterWater() { - PROFILE3("render overlays (after)"); + PROFILE3_GPU("overlays (after)"); if (!m->texlines.empty()) { @@ -262,7 +262,7 @@ void OverlayRenderer::RenderOverlaysAfterWater() void OverlayRenderer::RenderForegroundOverlays(const CCamera& viewCamera) { - PROFILE3("render overlays (fg)"); + PROFILE3_GPU("overlays (fg)"); glEnable(GL_TEXTURE_2D); glEnable(GL_BLEND); diff --git a/source/renderer/Renderer.cpp b/source/renderer/Renderer.cpp index 739bf4a73a..7f3ffeee34 100644 --- a/source/renderer/Renderer.cpp +++ b/source/renderer/Renderer.cpp @@ -933,7 +933,7 @@ void CRenderer::SetClearColor(SColor4ub color) void CRenderer::RenderShadowMap() { - PROFILE3("render shadow map"); + PROFILE3_GPU("shadow map"); m->shadow->BeginRender(); @@ -978,7 +978,7 @@ void CRenderer::RenderShadowMap() void CRenderer::RenderPatches(const CFrustum* frustum) { - PROFILE3("render patches"); + PROFILE3_GPU("patches"); bool filtered = false; if (frustum) @@ -1050,7 +1050,7 @@ private: void CRenderer::RenderModels(const CFrustum* frustum) { - PROFILE3("render models"); + PROFILE3_GPU("models"); int flags = 0; if (frustum) @@ -1087,7 +1087,7 @@ void CRenderer::RenderModels(const CFrustum* frustum) void CRenderer::RenderTransparentModels(ETransparentMode transparentMode, const CFrustum* frustum) { - PROFILE3("render transparent models"); + PROFILE3_GPU("transparent models"); int flags = 0; if (frustum) @@ -1165,7 +1165,10 @@ void CRenderer::SetObliqueFrustumClipping(const CVector4D& worldPlane) CVector4D camPlane = normalMatrix.Transform(worldPlane); // Grab the current projection matrix from OpenGL + { + PROFILE3("get proj matrix (oblique clipping)"); // sometimes the vsync delay gets accounted here glGetFloatv(GL_PROJECTION_MATRIX, matrix); + } // Calculate the clip-space corner point opposite the clipping plane // as (sgn(camPlane.x), sgn(camPlane.y), 1, 1) and @@ -1197,7 +1200,7 @@ void CRenderer::SetObliqueFrustumClipping(const CVector4D& worldPlane) // RenderReflections: render the water reflections to the reflection texture SScreenRect CRenderer::RenderReflections(const CBound& scissor) { - PROFILE("render reflections"); + PROFILE3_GPU("water reflections"); WaterManager& wm = m->waterManager; @@ -1284,7 +1287,7 @@ SScreenRect CRenderer::RenderReflections(const CBound& scissor) // RenderRefractions: render the water refractions to the refraction texture SScreenRect CRenderer::RenderRefractions(const CBound &scissor) { - PROFILE("render refractions"); + PROFILE3_GPU("water refractions"); WaterManager& wm = m->waterManager; @@ -1360,7 +1363,7 @@ SScreenRect CRenderer::RenderRefractions(const CBound &scissor) void CRenderer::RenderSilhouettes() { - PROFILE3("render silhouettes"); + PROFILE3_GPU("silhouettes"); // Render silhouettes of units hidden behind terrain or occluders. // To avoid breaking the standard rendering of alpha-blended objects, this @@ -1473,7 +1476,7 @@ void CRenderer::RenderParticles() if (GetRenderPath() != RP_SHADER) return; - PROFILE3("render particles"); + PROFILE3_GPU("particles"); m->particleRenderer.RenderParticles(); @@ -1529,11 +1532,11 @@ void CRenderer::RenderSubmissions() RenderShadowMap(); } - // clear buffers - PROFILE_START("clear buffers"); - glClearColor(m_ClearColor[0],m_ClearColor[1],m_ClearColor[2],m_ClearColor[3]); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - PROFILE_END("clear buffers"); + { + PROFILE3_GPU("clear buffers"); + glClearColor(m_ClearColor[0], m_ClearColor[1], m_ClearColor[2], m_ClearColor[3]); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + } ogl_WarnIfError(); @@ -1545,6 +1548,8 @@ void CRenderer::RenderSubmissions() { SScreenRect reflectionScissor = RenderReflections(waterScissor); SScreenRect refractionScissor = RenderRefractions(waterScissor); + + PROFILE3_GPU("water scissor"); SScreenRect dirty; dirty.x1 = std::min(reflectionScissor.x1, refractionScissor.x1); dirty.y1 = std::min(reflectionScissor.y1, refractionScissor.y1); @@ -1690,7 +1695,7 @@ void CRenderer::DisplayFrustum() // Text overlay rendering void CRenderer::RenderTextOverlays() { - PROFILE("render text overlays"); + PROFILE3_GPU("text overlays"); if (m_DisplayTerrainPriorities) m->terrainRenderer->RenderPriorities(); diff --git a/source/renderer/TerrainOverlay.cpp b/source/renderer/TerrainOverlay.cpp index 84e1a40bd0..47ca02b5f6 100644 --- a/source/renderer/TerrainOverlay.cpp +++ b/source/renderer/TerrainOverlay.cpp @@ -93,7 +93,7 @@ void TerrainOverlay::RenderOverlays() if (g_TerrainOverlayList.size() == 0) return; - PROFILE3("render terrain overlays"); + PROFILE3_GPU("terrain overlays"); glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); diff --git a/source/renderer/TerrainRenderer.cpp b/source/renderer/TerrainRenderer.cpp index 475635668d..444e5e8262 100644 --- a/source/renderer/TerrainRenderer.cpp +++ b/source/renderer/TerrainRenderer.cpp @@ -610,7 +610,7 @@ CBound TerrainRenderer::ScissorWater(const CMatrix3D &viewproj) // Render fancy water bool TerrainRenderer::RenderFancyWater() { - PROFILE3("render fancy water"); + PROFILE3_GPU("fancy water"); // If we're using fancy water, make sure its shader is loaded if (!m->fancyWaterShader) @@ -739,7 +739,7 @@ bool TerrainRenderer::RenderFancyWater() void TerrainRenderer::RenderSimpleWater() { - PROFILE3("render simple water"); + PROFILE3_GPU("simple water"); WaterManager* WaterMgr = g_Renderer.GetWaterManager(); CLOSTexture& losTexture = g_Game->GetView()->GetLOSTexture(); @@ -849,7 +849,7 @@ void TerrainRenderer::RenderWater() void TerrainRenderer::RenderPriorities() { - PROFILE("render priorities"); + PROFILE("priorities"); ENSURE(m->phase == Phase_Render); diff --git a/source/simulation2/components/CCmpCommandQueue.cpp b/source/simulation2/components/CCmpCommandQueue.cpp index 0a72ecbf46..f49789d7bf 100644 --- a/source/simulation2/components/CCmpCommandQueue.cpp +++ b/source/simulation2/components/CCmpCommandQueue.cpp @@ -85,6 +85,9 @@ public: { JSContext* cx = GetSimContext().GetScriptInterface().GetContext(); + PROFILE2_EVENT("post net command"); + PROFILE2_ATTR("command: %s", GetSimContext().GetScriptInterface().StringifyJSON(cmd.get(), false).c_str()); + // TODO: would be nicer to not use globals g_Game->GetTurnManager()->PostCommand(CScriptValRooted(cx, cmd)); } diff --git a/source/tools/profiler2/profiler2.html b/source/tools/profiler2/profiler2.html index c4d8c9a563..50890c7e2d 100644 --- a/source/tools/profiler2/profiler2.html +++ b/source/tools/profiler2/profiler2.html @@ -7,7 +7,8 @@ $(refresh); diff --git a/source/tools/profiler2/profiler2.js b/source/tools/profiler2/profiler2.js index 228255ca70..cbc699b48d 100644 --- a/source/tools/profiler2/profiler2.js +++ b/source/tools/profiler2/profiler2.js @@ -130,6 +130,7 @@ function update_display(range) // display_top_items(main_events, g_data.text_output); display_frames(processed_main, g_data.canvas_frames); + display_events(processed_main, g_data.canvas_frames); $(g_data.threads[0].canvas).unbind(); $(g_data.canvas_zoom).unbind(); @@ -284,21 +285,66 @@ function compute_intervals(data, range) var num_colours = 0; var events = []; - var intervals = []; - - var stack = []; - for (var i = start; i <= end; ++i) + + // Read events for the entire data period (not just start..end) + var lastWasEvent = false; + for (var i = 0; i < data.length; ++i) { if (data[i][0] == ITEM_EVENT) { events.push({'t': data[i][1], 'id': data[i][2]}); + lastWasEvent = true; + } + else if (data[i][0] == ITEM_ATTRIBUTE) + { + if (lastWasEvent) + { + if (!events[events.length-1].attrs) + events[events.length-1].attrs = []; + events[events.length-1].attrs.push(data[i][1]); + } + } + else + { + lastWasEvent = false; + } + } + + + var intervals = []; + + // Read intervals from the focused data period (start..end) + var stack = []; + var lastT = 0; + var lastWasEvent = false; + for (var i = start; i <= end; ++i) + { + if (data[i][0] == ITEM_EVENT) + { +// if (data[i][1] < lastT) +// console.log('Time went backwards: ' + (data[i][1] - lastT)); + + lastT = data[i][1]; + lastWasEvent = true; } else if (data[i][0] == ITEM_ENTER) { +// if (data[i][1] < lastT) +// console.log('Time went backwards: ' + (data[i][1] - lastT)); + stack.push({'t0': data[i][1], 'id': data[i][2]}); + + lastT = data[i][1]; + lastWasEvent = false; } else if (data[i][0] == ITEM_LEAVE) { +// if (data[i][1] < lastT) +// console.log('Time went backwards: ' + (data[i][1] - lastT)); + + lastT = data[i][1]; + lastWasEvent = false; + if (!stack.length) continue; var interval = stack.pop(); @@ -319,13 +365,7 @@ function compute_intervals(data, range) } else if (data[i][0] == ITEM_ATTRIBUTE) { - if (i > 0 && data[i-1][0] == ITEM_EVENT) - { - if (!events[events.length-1].attrs) - events[events.length-1].attrs = []; - events[events.length-1].attrs.push(data[i][1]); - } - else if (stack.length) + if (!lastWasEvent && stack.length) { if (!stack[stack.length-1].attrs) stack[stack.length-1].attrs = []; @@ -365,6 +405,9 @@ function display_frames(data, canvas) canvas._zoomData = { 'x_to_t': function(x) { return tmin + (x - xpadding) / dx; + }, + 't_to_x': function(t) { + return (t - tmin) * dx + xpadding; } }; @@ -411,6 +454,56 @@ function display_frames(data, canvas) ctx.restore(); } +function display_events(data, canvas) +{ + var ctx = canvas.getContext('2d'); + ctx.save(); + + var x_to_time = canvas._zoomData.x_to_t; + var time_to_x = canvas._zoomData.t_to_x; + + for (var i = 0; i < data.events.length; ++i) + { + var event = data.events[i]; + + if (event.id == '__framestart') + continue; + + if (event.id == 'gui event' && event.attrs && event.attrs[0] == 'type: mousemove') + continue; + + var x = time_to_x(event.t); + var y = 32; + + var x0 = x; + var x1 = x; + var y0 = y-4; + var y1 = y+4; + + ctx.strokeStyle = 'rgb(255, 0, 0)'; + ctx.beginPath(); + ctx.moveTo(x0, y0); + ctx.lineTo(x1, y1); + ctx.stroke(); + canvas._tooltips.push({ + 'x0': x0, 'x1': x1, + 'y0': y0, 'y1': y1, + 'text': function(event) { return function() { + var t = '' + event.id + '
'; + if (event.attrs) + { + event.attrs.forEach(function(attr) { + t += attr + '
'; + }); + } + return t; + }} (event) + }); + } + + ctx.restore(); +} + function display_hierarchy(main_data, data, canvas, range, zoom) { canvas._tooltips = []; @@ -483,7 +576,12 @@ function display_hierarchy(main_data, data, canvas, range, zoom) var label = interval.id; if (interval.attrs) - label += ' [...]'; + { + if (/^\d+$/.exec(interval.attrs[0])) + label += ' ' + interval.attrs[0]; + else + label += ' [...]'; + } var x0 = Math.floor(time_to_x(interval.t0)); var x1 = Math.floor(time_to_x(interval.t1)); var y0 = padding_top + interval.depth * BAR_SPACING; @@ -532,7 +630,7 @@ function display_hierarchy(main_data, data, canvas, range, zoom) ctx.moveTo(x+0.5, 0); ctx.lineTo(x+0.5, canvas.height); ctx.stroke(); - ctx.fillText('Frame [' + ((frame.t1 - frame.t0) * 1000).toFixed(0)+'ms]', x+2, padding_top - 24); + ctx.fillText(((frame.t1 - frame.t0) * 1000).toFixed(0)+'ms', x+2, padding_top - 24); ctx.restore(); } @@ -636,7 +734,7 @@ function set_tooltip_handlers(canvas) for (var i = 0; i < tooltips.length; ++i) { var t = tooltips[i]; - if (t.x0 <= relativeX && relativeX <= t.x1 && t.y0 <= relativeY && relativeY <= t.y1) + if (t.x0-1 <= relativeX && relativeX <= t.x1+1 && t.y0 <= relativeY && relativeY <= t.y1) { text = t.text(); break; @@ -644,6 +742,10 @@ function set_tooltip_handlers(canvas) } if (text) { + if (text.length > 512) + $('#tooltip').addClass('long'); + else + $('#tooltip').removeClass('long'); $('#tooltip').css('left', (event.pageX+16)+'px'); $('#tooltip').css('top', (event.pageY+8)+'px'); $('#tooltip').html(text);