Adds compute shaders support and scaling with FSR.

Fixes #6842

Comments By: phosit, Stan
Differential Revision: https://code.wildfiregames.com/D5218
This was SVN commit r28010.
This commit is contained in:
vladislavbelov
2024-01-17 19:40:27 +00:00
parent f9f798158a
commit e3f46bb809
63 changed files with 5337 additions and 118 deletions
+281 -19
View File
@@ -1,4 +1,4 @@
/* Copyright (C) 2023 Wildfire Games.
/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@@ -152,8 +152,7 @@ void CPostprocManager::Initialize()
[maxSamples](const uint32_t sampleCount) { return sampleCount <= maxSamples; } );
// The screen size starts out correct and then must be updated with Resize()
m_Width = g_Renderer.GetWidth();
m_Height = g_Renderer.GetHeight();
RecalculateSize(g_Renderer.GetWidth(), g_Renderer.GetHeight());
RecreateBuffers();
m_IsInitialized = true;
@@ -162,15 +161,20 @@ void CPostprocManager::Initialize()
UpdateAntiAliasingTechnique();
UpdateSharpeningTechnique();
UpdateSharpnessFactor();
CStr upscaleName;
CFG_GET_VAL("renderer.upscale.technique", upscaleName);
SetUpscaleTechnique(upscaleName);
// This might happen after the map is loaded and the effect chosen
SetPostEffect(m_PostProcEffect);
if (m_Device->GetCapabilities().computeShaders)
m_DownscaleComputeTech = g_Renderer.GetShaderManager().LoadEffect(CStrIntern("compute_downscale"));
}
void CPostprocManager::Resize()
{
m_Width = g_Renderer.GetWidth();
m_Height = g_Renderer.GetHeight();
RecalculateSize(g_Renderer.GetWidth(), g_Renderer.GetHeight());
// If the buffers were intialized, recreate them to the new size.
if (m_IsInitialized)
@@ -197,6 +201,42 @@ void CPostprocManager::RecreateBuffers()
GEN_BUFFER_RGBA(m_ColorTex1, m_Width, m_Height);
GEN_BUFFER_RGBA(m_ColorTex2, m_Width, m_Height);
if (m_UnscaledWidth != m_Width && m_Device->GetCapabilities().computeShaders)
{
const uint32_t usage =
Renderer::Backend::ITexture::Usage::TRANSFER_SRC |
Renderer::Backend::ITexture::Usage::COLOR_ATTACHMENT |
Renderer::Backend::ITexture::Usage::SAMPLED |
Renderer::Backend::ITexture::Usage::STORAGE;
m_UnscaledTexture1 = m_Device->CreateTexture2D(
"PostProcUnscaledTexture1", usage,
Renderer::Backend::Format::R8G8B8A8_UNORM,
m_UnscaledWidth, m_UnscaledHeight,
Renderer::Backend::Sampler::MakeDefaultSampler(
Renderer::Backend::Sampler::Filter::LINEAR,
Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
m_UnscaledTexture2 = m_Device->CreateTexture2D(
"PostProcUnscaledTexture2", usage,
Renderer::Backend::Format::R8G8B8A8_UNORM, m_UnscaledWidth, m_UnscaledHeight,
Renderer::Backend::Sampler::MakeDefaultSampler(
Renderer::Backend::Sampler::Filter::LINEAR,
Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
Renderer::Backend::SColorAttachment colorAttachment{};
colorAttachment.clearColor = CColor{0.0f, 0.0f, 0.0f, 0.0f};
colorAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::LOAD;
colorAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
colorAttachment.texture = m_UnscaledTexture1.get();
m_UnscaledFramebuffer1 = m_Device->CreateFramebuffer("PostprocUnscaledFramebuffer1",
&colorAttachment, nullptr);
colorAttachment.texture = m_UnscaledTexture2.get();
m_UnscaledFramebuffer2 = m_Device->CreateFramebuffer("PostprocUnscaledFramebuffer2",
&colorAttachment, nullptr);
}
// Textures for several blur sizes. It would be possible to reuse
// m_BlurTex2b, thus avoiding the need for m_BlurTex4b and m_BlurTex8b, though given
// that these are fairly small it's probably not worth complicating the coordinates passed
@@ -386,13 +426,129 @@ Renderer::Backend::IFramebuffer* CPostprocManager::PrepareAndGetOutputFramebuffe
{
ENSURE(m_IsInitialized);
// Leaves m_PingFbo selected for rendering; m_WhichBuffer stays true at this point.
// Leaves m_PingFramebuffer selected for rendering; m_WhichBuffer stays true at this point.
m_WhichBuffer = true;
return m_UsingMultisampleBuffer ? m_MultisampleFramebuffer.get() : m_CaptureFramebuffer.get();
}
void CPostprocManager::UpscaleTextureByCompute(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
CShaderTechnique* shaderTechnique,
Renderer::Backend::ITexture* source,
Renderer::Backend::ITexture* destination)
{
Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
const std::array<float, 4> screenSize{{
static_cast<float>(m_Width), static_cast<float>(m_Height),
static_cast<float>(m_UnscaledWidth), static_cast<float>(m_UnscaledHeight)}};
constexpr uint32_t threadGroupWorkRegionDim = 16;
const uint32_t dispatchGroupCountX = DivideRoundUp(m_UnscaledWidth, threadGroupWorkRegionDim);
const uint32_t dispatchGroupCountY = DivideRoundUp(m_UnscaledHeight, threadGroupWorkRegionDim);
deviceCommandContext->BeginComputePass();
deviceCommandContext->SetComputePipelineState(
shaderTechnique->GetComputePipelineState());
deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
deviceCommandContext->SetTexture(shaderProgram->GetBindingSlot(str_inTex), source);
deviceCommandContext->SetStorageTexture(shaderProgram->GetBindingSlot(str_outTex), destination);
deviceCommandContext->Dispatch(dispatchGroupCountX, dispatchGroupCountY, 1);
deviceCommandContext->EndComputePass();
}
void CPostprocManager::UpscaleTextureByFullscreenQuad(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
CShaderTechnique* shaderTechnique,
Renderer::Backend::ITexture* source,
Renderer::Backend::IFramebuffer* destination)
{
Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
const std::array<float, 4> screenSize{{
static_cast<float>(m_Width), static_cast<float>(m_Height),
static_cast<float>(m_UnscaledWidth), static_cast<float>(m_UnscaledHeight)}};
deviceCommandContext->BeginFramebufferPass(destination);
Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
viewportRect.width = destination->GetWidth();
viewportRect.height = destination->GetHeight();
deviceCommandContext->SetViewports(1, &viewportRect);
deviceCommandContext->SetGraphicsPipelineState(
shaderTechnique->GetGraphicsPipelineState());
deviceCommandContext->BeginPass();
deviceCommandContext->SetTexture(
shaderProgram->GetBindingSlot(str_inTex), source);
deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
DrawFullscreenQuad(m_VertexInputLayout, deviceCommandContext);
deviceCommandContext->EndPass();
deviceCommandContext->EndFramebufferPass();
}
void CPostprocManager::ApplySharpnessAfterScale(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
CShaderTechnique* shaderTechnique,
Renderer::Backend::ITexture* source,
Renderer::Backend::ITexture* destination)
{
Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
// Recommended sharpness for RCAS.
constexpr float sharpness = 0.2f;
const std::array<float, 4> screenSize{ {
static_cast<float>(m_Width), static_cast<float>(m_Height),
static_cast<float>(m_UnscaledWidth), static_cast<float>(m_UnscaledHeight)} };
constexpr uint32_t threadGroupWorkRegionDim = 16;
const uint32_t dispatchGroupCountX = DivideRoundUp(m_UnscaledWidth, threadGroupWorkRegionDim);
const uint32_t dispatchGroupCountY = DivideRoundUp(m_UnscaledHeight, threadGroupWorkRegionDim);
deviceCommandContext->BeginComputePass();
deviceCommandContext->SetComputePipelineState(
shaderTechnique->GetComputePipelineState());
deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_sharpness), sharpness);
deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
deviceCommandContext->SetTexture(shaderProgram->GetBindingSlot(str_inTex), source);
deviceCommandContext->SetStorageTexture(
shaderProgram->GetBindingSlot(str_outTex), destination);
deviceCommandContext->Dispatch(dispatchGroupCountX, dispatchGroupCountY, 1);
deviceCommandContext->EndComputePass();
}
void CPostprocManager::DownscaleTextureByCompute(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
CShaderTechnique* shaderTechnique,
Renderer::Backend::ITexture* source,
Renderer::Backend::ITexture* destination)
{
Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
const std::array<float, 4> screenSize{{
static_cast<float>(m_Width), static_cast<float>(m_Height),
static_cast<float>(m_UnscaledWidth), static_cast<float>(m_UnscaledHeight)}};
constexpr uint32_t threadGroupWorkRegionDim = 8;
const uint32_t dispatchGroupCountX = DivideRoundUp(m_UnscaledWidth, threadGroupWorkRegionDim);
const uint32_t dispatchGroupCountY = DivideRoundUp(m_UnscaledHeight, threadGroupWorkRegionDim);
deviceCommandContext->BeginComputePass();
deviceCommandContext->SetComputePipelineState(
shaderTechnique->GetComputePipelineState());
deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
deviceCommandContext->SetTexture(shaderProgram->GetBindingSlot(str_inTex), source);
deviceCommandContext->SetStorageTexture(shaderProgram->GetBindingSlot(str_outTex), destination);
deviceCommandContext->Dispatch(dispatchGroupCountX, dispatchGroupCountY, 1);
deviceCommandContext->EndComputePass();
}
void CPostprocManager::BlitOutputFramebuffer(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
Renderer::Backend::IFramebuffer* destination)
@@ -401,24 +557,81 @@ void CPostprocManager::BlitOutputFramebuffer(
GPU_SCOPED_LABEL(deviceCommandContext, "Copy postproc to backbuffer");
Renderer::Backend::IFramebuffer* source =
(m_WhichBuffer ? m_PingFramebuffer : m_PongFramebuffer).get();
Renderer::Backend::ITexture* previousTexture =
(m_WhichBuffer ? m_ColorTex1 : m_ColorTex2).get();
// We blit to the backbuffer from the previous active buffer.
// We'll have upscaling/downscaling separately.
Renderer::Backend::IDeviceCommandContext::Rect region{};
region.width = std::min(source->GetWidth(), destination->GetWidth());
region.height = std::min(source->GetHeight(), destination->GetHeight());
deviceCommandContext->BlitFramebuffer(
source, destination, region, region,
Renderer::Backend::Sampler::Filter::NEAREST);
if (ShouldUpscale())
{
if (m_UpscaleComputeTech)
{
Renderer::Backend::ITexture* unscaledTexture = m_RCASComputeTech ? m_UnscaledTexture1.get() : m_UnscaledTexture2.get();
UpscaleTextureByCompute(deviceCommandContext, m_UpscaleComputeTech.get(), previousTexture, unscaledTexture);
if (m_RCASComputeTech)
ApplySharpnessAfterScale(deviceCommandContext, m_RCASComputeTech.get(), m_UnscaledTexture1.get(), m_UnscaledTexture2.get());
Renderer::Backend::IDeviceCommandContext::Rect sourceRegion{}, destinationRegion{};
sourceRegion.width = m_UnscaledTexture2->GetWidth();
sourceRegion.height = m_UnscaledTexture2->GetHeight();
destinationRegion.width = destination->GetWidth();
destinationRegion.height = destination->GetHeight();
deviceCommandContext->BlitFramebuffer(
m_UnscaledFramebuffer2.get(), destination, sourceRegion, destinationRegion,
Renderer::Backend::Sampler::Filter::NEAREST);
}
else
{
UpscaleTextureByFullscreenQuad(deviceCommandContext, m_UpscaleTech.get(), previousTexture, destination);
}
}
else if (ShouldDownscale())
{
Renderer::Backend::IDeviceCommandContext::Rect sourceRegion{};
Renderer::Backend::Sampler::Filter samplerFilter{
Renderer::Backend::Sampler::Filter::NEAREST};
Renderer::Backend::IFramebuffer* source{nullptr};
if (m_DownscaleComputeTech)
{
DownscaleTextureByCompute(deviceCommandContext, m_DownscaleComputeTech.get(), previousTexture, m_UnscaledTexture1.get());
source = m_UnscaledFramebuffer1.get();
sourceRegion.width = m_UnscaledTexture1->GetWidth();
sourceRegion.height = m_UnscaledTexture1->GetHeight();
}
else
{
source = (m_WhichBuffer ? m_PingFramebuffer : m_PongFramebuffer).get();
sourceRegion.width = source->GetWidth();
sourceRegion.height = source->GetHeight();
samplerFilter = Renderer::Backend::Sampler::Filter::LINEAR;
}
Renderer::Backend::IDeviceCommandContext::Rect destinationRegion{};
destinationRegion.width = destination->GetWidth();
destinationRegion.height = destination->GetHeight();
deviceCommandContext->BlitFramebuffer(
source, destination, sourceRegion, destinationRegion, samplerFilter);
}
else
{
Renderer::Backend::IFramebuffer* source =
(m_WhichBuffer ? m_PingFramebuffer : m_PongFramebuffer).get();
// We blit to the backbuffer from the previous active buffer.
Renderer::Backend::IDeviceCommandContext::Rect region{};
region.width = std::min(source->GetWidth(), destination->GetWidth());
region.height = std::min(source->GetHeight(), destination->GetHeight());
deviceCommandContext->BlitFramebuffer(
source, destination, region, region,
Renderer::Backend::Sampler::Filter::NEAREST);
}
}
void CPostprocManager::ApplyEffect(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
const CShaderTechniquePtr& shaderTech, int pass)
{
// select the other FBO for rendering
// Select the other framebuffer for rendering.
Renderer::Backend::IFramebuffer* framebuffer =
(m_WhichBuffer ? m_PongFramebuffer : m_PingFramebuffer).get();
deviceCommandContext->BeginFramebufferPass(framebuffer);
@@ -433,7 +646,7 @@ void CPostprocManager::ApplyEffect(
deviceCommandContext->BeginPass();
Renderer::Backend::IShaderProgram* shader = shaderTech->GetShader(pass);
// Use the textures from the current FBO as input to the shader.
// Use the textures from the current framebuffer as input to the shader.
// We also bind a bunch of other textures and parameters, but since
// this only happens once per frame the overhead is negligible.
deviceCommandContext->SetTexture(
@@ -499,7 +712,7 @@ void CPostprocManager::ApplyPostproc(
ApplyEffect(deviceCommandContext, m_AATech, pass);
}
if (hasSharp)
if (hasSharp && !ShouldUpscale())
{
for (int pass = 0; pass < m_SharpTech->GetNumPasses(); ++pass)
ApplyEffect(deviceCommandContext, m_SharpTech, pass);
@@ -617,6 +830,26 @@ void CPostprocManager::UpdateSharpnessFactor()
CFG_GET_VAL("sharpness", m_Sharpness);
}
void CPostprocManager::SetUpscaleTechnique(const CStr& upscaleName)
{
m_UpscaleTech.reset();
m_UpscaleComputeTech.reset();
m_RCASComputeTech.reset();
if (m_Device->GetCapabilities().computeShaders && upscaleName == "fsr")
{
m_UpscaleComputeTech = g_Renderer.GetShaderManager().LoadEffect(str_compute_upscale_fsr);
m_RCASComputeTech = g_Renderer.GetShaderManager().LoadEffect(str_compute_rcas);
}
else if (upscaleName == "pixelated")
{
m_UpscaleTech = g_Renderer.GetShaderManager().LoadEffect(str_upscale_nearest);
}
else
{
m_UpscaleTech = g_Renderer.GetShaderManager().LoadEffect(str_upscale_bilinear);
}
}
void CPostprocManager::SetDepthBufferClipPlanes(float nearPlane, float farPlane)
{
m_NearPlane = nearPlane;
@@ -695,3 +928,32 @@ void CPostprocManager::ResolveMultisampleFramebuffer(
deviceCommandContext->ResolveFramebuffer(
m_MultisampleFramebuffer.get(), m_PingFramebuffer.get());
}
void CPostprocManager::RecalculateSize(const uint32_t width, const uint32_t height)
{
if (m_Device->GetBackend() == Renderer::Backend::Backend::GL_ARB)
{
m_Scale = 1.0f;
return;
}
CFG_GET_VAL("renderer.scale", m_Scale);
if (m_Scale < 0.25f || m_Scale > 2.0f)
{
LOGWARNING("Invalid renderer scale: %0.2f", m_Scale);
m_Scale = 1.0f;
}
m_UnscaledWidth = width;
m_UnscaledHeight = height;
m_Width = m_UnscaledWidth * m_Scale;
m_Height = m_UnscaledHeight * m_Scale;
}
bool CPostprocManager::ShouldUpscale() const
{
return m_Width < m_UnscaledWidth;
}
bool CPostprocManager::ShouldDownscale() const
{
return m_Width > m_UnscaledWidth;
}