blob: 75bde2c66b9d4c13f27bda93838be070ccd63286 [file] [log] [blame]
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtQuick module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qsgd3d12engine_p.h"
#include "qsgd3d12engine_p_p.h"
#include "cs_mipmapgen.hlslh"
#include <QString>
#include <QColor>
#include <QLoggingCategory>
#include <qmath.h>
#include <qalgorithms.h>
// Comment out to disable DeviceLossTester functionality in order to reduce
// code size and improve startup perf a tiny bit.
#define DEVLOSS_TEST
#ifdef DEVLOSS_TEST
#include "cs_tdr.hlslh"
#endif
#ifdef Q_OS_WINRT
#include <QtCore/private/qeventdispatcher_winrt_p.h>
#include <functional>
#include <windows.ui.xaml.h>
#include <windows.ui.xaml.media.dxinterop.h>
#endif
#include <comdef.h>
QT_BEGIN_NAMESPACE
// NOTE: Avoid categorized logging. It is slow.
#define DECLARE_DEBUG_VAR(variable) \
static bool debug_ ## variable() \
{ static bool value = qgetenv("QSG_RENDERER_DEBUG").contains(QT_STRINGIFY(variable)); return value; }
DECLARE_DEBUG_VAR(render)
DECLARE_DEBUG_VAR(descheap)
DECLARE_DEBUG_VAR(buffer)
DECLARE_DEBUG_VAR(texture)
// Except for system info on startup.
Q_LOGGING_CATEGORY(QSG_LOG_INFO_GENERAL, "qt.scenegraph.general")
// Any changes to the defaults below must be reflected in adaptations.qdoc as
// well and proven by qmlbench or similar.
static const int DEFAULT_SWAP_CHAIN_BUFFER_COUNT = 3;
static const int DEFAULT_FRAME_IN_FLIGHT_COUNT = 2;
static const int DEFAULT_WAITABLE_SWAP_CHAIN_MAX_LATENCY = 0;
static const int MAX_DRAW_CALLS_PER_LIST = 4096;
static const int MAX_CACHED_ROOTSIG = 16;
static const int MAX_CACHED_PSO = 64;
static const int GPU_CBVSRVUAV_DESCRIPTORS = 512;
static const DXGI_FORMAT RT_COLOR_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM;
static const int BUCKETS_PER_HEAP = 8; // must match freeMap
static const int DESCRIPTORS_PER_BUCKET = 32; // the bit map (freeMap) is quint32
static const int MAX_DESCRIPTORS_PER_HEAP = BUCKETS_PER_HEAP * DESCRIPTORS_PER_BUCKET;
static QString comErrorMessage(HRESULT hr)
{
#ifndef Q_OS_WINRT
const _com_error comError(hr);
#else
const _com_error comError(hr, nullptr);
#endif
QString result = QLatin1String("Error 0x") + QString::number(ulong(hr), 16);
if (const wchar_t *msg = comError.ErrorMessage())
result += QLatin1String(": ") + QString::fromWCharArray(msg);
return result;
}
D3D12_CPU_DESCRIPTOR_HANDLE QSGD3D12CPUDescriptorHeapManager::allocate(D3D12_DESCRIPTOR_HEAP_TYPE type)
{
D3D12_CPU_DESCRIPTOR_HANDLE h = {};
for (Heap &heap : m_heaps) {
if (heap.type == type) {
for (int bucket = 0; bucket < _countof(heap.freeMap); ++bucket)
if (heap.freeMap[bucket]) {
uint freePos = qCountTrailingZeroBits(heap.freeMap[bucket]);
heap.freeMap[bucket] &= ~(1UL << freePos);
if (Q_UNLIKELY(debug_descheap()))
qDebug("descriptor handle heap %p type %x reserve in bucket %d index %d", &heap, type, bucket, freePos);
freePos += bucket * DESCRIPTORS_PER_BUCKET;
h = heap.start;
h.ptr += freePos * heap.handleSize;
return h;
}
}
}
Heap heap;
heap.type = type;
heap.handleSize = m_handleSizes[type];
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
heapDesc.NumDescriptors = MAX_DESCRIPTORS_PER_HEAP;
heapDesc.Type = type;
// The heaps created here are _never_ shader-visible.
HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap.heap));
if (FAILED(hr)) {
qWarning("Failed to create heap with type 0x%x: %s",
type, qPrintable(comErrorMessage(hr)));
return h;
}
heap.start = heap.heap->GetCPUDescriptorHandleForHeapStart();
if (Q_UNLIKELY(debug_descheap()))
qDebug("new descriptor heap, type %x, start %llu", type, heap.start.ptr);
heap.freeMap[0] = 0xFFFFFFFE;
for (int i = 1; i < _countof(heap.freeMap); ++i)
heap.freeMap[i] = 0xFFFFFFFF;
h = heap.start;
m_heaps.append(heap);
return h;
}
void QSGD3D12CPUDescriptorHeapManager::release(D3D12_CPU_DESCRIPTOR_HANDLE handle, D3D12_DESCRIPTOR_HEAP_TYPE type)
{
for (Heap &heap : m_heaps) {
if (heap.type == type
&& handle.ptr >= heap.start.ptr
&& handle.ptr < heap.start.ptr + heap.handleSize * MAX_DESCRIPTORS_PER_HEAP) {
unsigned long pos = (handle.ptr - heap.start.ptr) / heap.handleSize;
const int bucket = pos / DESCRIPTORS_PER_BUCKET;
const int indexInBucket = pos - bucket * DESCRIPTORS_PER_BUCKET;
heap.freeMap[bucket] |= 1UL << indexInBucket;
if (Q_UNLIKELY(debug_descheap()))
qDebug("free descriptor handle heap %p type %x bucket %d index %d", &heap, type, bucket, indexInBucket);
return;
}
}
qWarning("QSGD3D12CPUDescriptorHeapManager: Attempted to release untracked descriptor handle %llu of type %d", handle.ptr, type);
}
void QSGD3D12CPUDescriptorHeapManager::initialize(ID3D12Device *device)
{
m_device = device;
for (int i = 0; i < D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES; ++i)
m_handleSizes[i] = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE(i));
}
void QSGD3D12CPUDescriptorHeapManager::releaseResources()
{
for (Heap &heap : m_heaps)
heap.heap = nullptr;
m_heaps.clear();
m_device = nullptr;
}
// One device per process, one everything else (engine) per window.
Q_GLOBAL_STATIC(QSGD3D12DeviceManager, deviceManager)
static void getHardwareAdapter(IDXGIFactory1 *factory, IDXGIAdapter1 **outAdapter)
{
const D3D_FEATURE_LEVEL fl = D3D_FEATURE_LEVEL_11_0;
ComPtr<IDXGIAdapter1> adapter;
DXGI_ADAPTER_DESC1 desc;
for (int adapterIndex = 0; factory->EnumAdapters1(adapterIndex, &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
DXGI_ADAPTER_DESC1 desc;
adapter->GetDesc1(&desc);
const QString name = QString::fromUtf16((char16_t *) desc.Description);
qCDebug(QSG_LOG_INFO_GENERAL, "Adapter %d: '%s' (flags 0x%x)", adapterIndex, qPrintable(name), desc.Flags);
}
if (qEnvironmentVariableIsSet("QT_D3D_ADAPTER_INDEX")) {
const int adapterIndex = qEnvironmentVariableIntValue("QT_D3D_ADAPTER_INDEX");
if (SUCCEEDED(factory->EnumAdapters1(adapterIndex, &adapter))) {
adapter->GetDesc1(&desc);
const QString name = QString::fromUtf16((char16_t *) desc.Description);
HRESULT hr = D3D12CreateDevice(adapter.Get(), fl, _uuidof(ID3D12Device), nullptr);
if (SUCCEEDED(hr)) {
qCDebug(QSG_LOG_INFO_GENERAL, "Using requested adapter '%s'", qPrintable(name));
*outAdapter = adapter.Detach();
return;
} else {
qWarning("Failed to create device for requested adapter '%s': %s",
qPrintable(name), qPrintable(comErrorMessage(hr)));
}
}
}
for (int adapterIndex = 0; factory->EnumAdapters1(adapterIndex, &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
adapter->GetDesc1(&desc);
if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)
continue;
if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), fl, _uuidof(ID3D12Device), nullptr))) {
const QString name = QString::fromUtf16((char16_t *) desc.Description);
qCDebug(QSG_LOG_INFO_GENERAL, "Using adapter '%s'", qPrintable(name));
break;
}
}
*outAdapter = adapter.Detach();
}
ID3D12Device *QSGD3D12DeviceManager::ref()
{
ensureCreated();
m_ref.ref();
return m_device.Get();
}
void QSGD3D12DeviceManager::unref()
{
if (!m_ref.deref()) {
if (Q_UNLIKELY(debug_render()))
qDebug("destroying d3d device");
m_device = nullptr;
m_factory = nullptr;
}
}
void QSGD3D12DeviceManager::deviceLossDetected()
{
for (DeviceLossObserver *observer : qAsConst(m_observers))
observer->deviceLost();
// Nothing else to do here. All windows are expected to release their
// resources and call unref() in response immediately.
}
IDXGIFactory4 *QSGD3D12DeviceManager::dxgi()
{
ensureCreated();
return m_factory.Get();
}
void QSGD3D12DeviceManager::ensureCreated()
{
if (m_device)
return;
HRESULT hr = CreateDXGIFactory2(0, IID_PPV_ARGS(&m_factory));
if (FAILED(hr)) {
qWarning("Failed to create DXGI: %s", qPrintable(comErrorMessage(hr)));
return;
}
ComPtr<IDXGIAdapter1> adapter;
getHardwareAdapter(m_factory.Get(), &adapter);
bool warp = true;
if (adapter) {
HRESULT hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device));
if (SUCCEEDED(hr))
warp = false;
else
qWarning("Failed to create device: %s", qPrintable(comErrorMessage(hr)));
}
if (warp) {
qCDebug(QSG_LOG_INFO_GENERAL, "Using WARP");
m_factory->EnumWarpAdapter(IID_PPV_ARGS(&adapter));
HRESULT hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device));
if (FAILED(hr)) {
qWarning("Failed to create WARP device: %s", qPrintable(comErrorMessage(hr)));
return;
}
}
ComPtr<IDXGIAdapter3> adapter3;
if (SUCCEEDED(adapter.As(&adapter3))) {
DXGI_QUERY_VIDEO_MEMORY_INFO vidMemInfo;
if (SUCCEEDED(adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &vidMemInfo))) {
qCDebug(QSG_LOG_INFO_GENERAL, "Video memory info: LOCAL: Budget %llu KB CurrentUsage %llu KB AvailableForReservation %llu KB CurrentReservation %llu KB",
vidMemInfo.Budget / 1024, vidMemInfo.CurrentUsage / 1024,
vidMemInfo.AvailableForReservation / 1024, vidMemInfo.CurrentReservation / 1024);
}
if (SUCCEEDED(adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &vidMemInfo))) {
qCDebug(QSG_LOG_INFO_GENERAL, "Video memory info: NON-LOCAL: Budget %llu KB CurrentUsage %llu KB AvailableForReservation %llu KB CurrentReservation %llu KB",
vidMemInfo.Budget / 1024, vidMemInfo.CurrentUsage / 1024,
vidMemInfo.AvailableForReservation / 1024, vidMemInfo.CurrentReservation / 1024);
}
}
}
void QSGD3D12DeviceManager::registerDeviceLossObserver(DeviceLossObserver *observer)
{
if (!m_observers.contains(observer))
m_observers.append(observer);
}
QSGD3D12Engine::QSGD3D12Engine()
{
d = new QSGD3D12EnginePrivate;
}
QSGD3D12Engine::~QSGD3D12Engine()
{
d->waitGPU();
d->releaseResources();
delete d;
}
bool QSGD3D12Engine::attachToWindow(WId window, const QSize &size, float dpr, int surfaceFormatSamples, bool alpha)
{
if (d->isInitialized()) {
qWarning("QSGD3D12Engine: Cannot attach active engine to window");
return false;
}
d->initialize(window, size, dpr, surfaceFormatSamples, alpha);
return d->isInitialized();
}
void QSGD3D12Engine::releaseResources()
{
d->releaseResources();
}
bool QSGD3D12Engine::hasResources() const
{
// An explicit releaseResources() or a device loss results in initialized == false.
return d->isInitialized();
}
void QSGD3D12Engine::setWindowSize(const QSize &size, float dpr)
{
d->setWindowSize(size, dpr);
}
WId QSGD3D12Engine::window() const
{
return d->currentWindow();
}
QSize QSGD3D12Engine::windowSize() const
{
return d->currentWindowSize();
}
float QSGD3D12Engine::windowDevicePixelRatio() const
{
return d->currentWindowDpr();
}
uint QSGD3D12Engine::windowSamples() const
{
return d->currentWindowSamples();
}
void QSGD3D12Engine::beginFrame()
{
d->beginFrame();
}
void QSGD3D12Engine::endFrame()
{
d->endFrame();
}
void QSGD3D12Engine::beginLayer()
{
d->beginLayer();
}
void QSGD3D12Engine::endLayer()
{
d->endLayer();
}
void QSGD3D12Engine::invalidateCachedFrameState()
{
d->invalidateCachedFrameState();
}
void QSGD3D12Engine::restoreFrameState(bool minimal)
{
d->restoreFrameState(minimal);
}
void QSGD3D12Engine::finalizePipeline(const QSGD3D12PipelineState &pipelineState)
{
d->finalizePipeline(pipelineState);
}
uint QSGD3D12Engine::genBuffer()
{
return d->genBuffer();
}
void QSGD3D12Engine::releaseBuffer(uint id)
{
d->releaseBuffer(id);
}
void QSGD3D12Engine::resetBuffer(uint id, const quint8 *data, int size)
{
d->resetBuffer(id, data, size);
}
void QSGD3D12Engine::markBufferDirty(uint id, int offset, int size)
{
d->markBufferDirty(id, offset, size);
}
void QSGD3D12Engine::queueViewport(const QRect &rect)
{
d->queueViewport(rect);
}
void QSGD3D12Engine::queueScissor(const QRect &rect)
{
d->queueScissor(rect);
}
void QSGD3D12Engine::queueSetRenderTarget(uint id)
{
d->queueSetRenderTarget(id);
}
void QSGD3D12Engine::queueClearRenderTarget(const QColor &color)
{
d->queueClearRenderTarget(color);
}
void QSGD3D12Engine::queueClearDepthStencil(float depthValue, quint8 stencilValue, ClearFlags which)
{
d->queueClearDepthStencil(depthValue, stencilValue, which);
}
void QSGD3D12Engine::queueSetBlendFactor(const QVector4D &factor)
{
d->queueSetBlendFactor(factor);
}
void QSGD3D12Engine::queueSetStencilRef(quint32 ref)
{
d->queueSetStencilRef(ref);
}
void QSGD3D12Engine::queueDraw(const DrawParams &params)
{
d->queueDraw(params);
}
void QSGD3D12Engine::present()
{
d->present();
}
void QSGD3D12Engine::waitGPU()
{
d->waitGPU();
}
uint QSGD3D12Engine::genTexture()
{
return d->genTexture();
}
void QSGD3D12Engine::createTexture(uint id, const QSize &size, QImage::Format format, TextureCreateFlags flags)
{
d->createTexture(id, size, format, flags);
}
void QSGD3D12Engine::queueTextureResize(uint id, const QSize &size)
{
d->queueTextureResize(id, size);
}
void QSGD3D12Engine::queueTextureUpload(uint id, const QImage &image, const QPoint &dstPos, TextureUploadFlags flags)
{
d->queueTextureUpload(id, QVector<QImage>() << image, QVector<QPoint>() << dstPos, flags);
}
void QSGD3D12Engine::queueTextureUpload(uint id, const QVector<QImage> &images, const QVector<QPoint> &dstPos,
TextureUploadFlags flags)
{
d->queueTextureUpload(id, images, dstPos, flags);
}
void QSGD3D12Engine::releaseTexture(uint id)
{
d->releaseTexture(id);
}
void QSGD3D12Engine::useTexture(uint id)
{
d->useTexture(id);
}
uint QSGD3D12Engine::genRenderTarget()
{
return d->genRenderTarget();
}
void QSGD3D12Engine::createRenderTarget(uint id, const QSize &size, const QVector4D &clearColor, uint samples)
{
d->createRenderTarget(id, size, clearColor, samples);
}
void QSGD3D12Engine::releaseRenderTarget(uint id)
{
d->releaseRenderTarget(id);
}
void QSGD3D12Engine::useRenderTargetAsTexture(uint id)
{
d->useRenderTargetAsTexture(id);
}
uint QSGD3D12Engine::activeRenderTarget() const
{
return d->activeRenderTarget();
}
QImage QSGD3D12Engine::executeAndWaitReadbackRenderTarget(uint id)
{
return d->executeAndWaitReadbackRenderTarget(id);
}
void QSGD3D12Engine::simulateDeviceLoss()
{
d->simulateDeviceLoss();
}
void *QSGD3D12Engine::getResource(QQuickWindow *, QSGRendererInterface::Resource resource) const
{
return d->getResource(resource);
}
static inline quint32 alignedSize(quint32 size, quint32 byteAlign)
{
return (size + byteAlign - 1) & ~(byteAlign - 1);
}
quint32 QSGD3D12Engine::alignedConstantBufferSize(quint32 size)
{
return alignedSize(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
}
QSGD3D12Format QSGD3D12Engine::toDXGIFormat(QSGGeometry::Type sgtype, int tupleSize, int *size)
{
QSGD3D12Format format = FmtUnknown;
static const QSGD3D12Format formatMap_ub[] = { FmtUnknown,
FmtUNormByte,
FmtUNormByte2,
FmtUnknown,
FmtUNormByte4 };
static const QSGD3D12Format formatMap_f[] = { FmtUnknown,
FmtFloat,
FmtFloat2,
FmtFloat3,
FmtFloat4 };
switch (sgtype) {
case QSGGeometry::UnsignedByteType:
format = formatMap_ub[tupleSize];
if (size)
*size = tupleSize;
break;
case QSGGeometry::FloatType:
format = formatMap_f[tupleSize];
if (size)
*size = sizeof(float) * tupleSize;
break;
case QSGGeometry::UnsignedShortType:
format = FmtUnsignedShort;
if (size)
*size = sizeof(ushort) * tupleSize;
break;
case QSGGeometry::UnsignedIntType:
format = FmtUnsignedInt;
if (size)
*size = sizeof(uint) * tupleSize;
break;
case QSGGeometry::ByteType:
case QSGGeometry::IntType:
case QSGGeometry::ShortType:
qWarning("no mapping for GL type 0x%x", sgtype);
break;
default:
qWarning("unknown GL type 0x%x", sgtype);
break;
}
return format;
}
int QSGD3D12Engine::mipMapLevels(const QSize &size)
{
return ceil(log2(qMax(size.width(), size.height()))) + 1;
}
inline static bool isPowerOfTwo(int x)
{
// Assumption: x >= 1
return x == (x & -x);
}
QSize QSGD3D12Engine::mipMapAdjustedSourceSize(const QSize &size)
{
if (size.isEmpty())
return size;
QSize adjustedSize = size;
// ### for now only power-of-two sizes are mipmap-capable
if (!isPowerOfTwo(size.width()))
adjustedSize.setWidth(qNextPowerOfTwo(size.width()));
if (!isPowerOfTwo(size.height()))
adjustedSize.setHeight(qNextPowerOfTwo(size.height()));
return adjustedSize;
}
void QSGD3D12EnginePrivate::releaseResources()
{
if (!initialized)
return;
mipmapper.releaseResources();
devLossTest.releaseResources();
frameCommandList = nullptr;
copyCommandList = nullptr;
copyCommandAllocator = nullptr;
for (int i = 0; i < frameInFlightCount; ++i) {
frameCommandAllocator[i] = nullptr;
pframeData[i].gpuCbvSrvUavHeap = nullptr;
delete frameFence[i];
}
defaultDS = nullptr;
for (int i = 0; i < swapChainBufferCount; ++i) {
backBufferRT[i] = nullptr;
defaultRT[i] = nullptr;
}
psoCache.clear();
rootSigCache.clear();
buffers.clear();
textures.clear();
renderTargets.clear();
cpuDescHeapManager.releaseResources();
commandQueue = nullptr;
copyCommandQueue = nullptr;
#ifndef Q_OS_WINRT
dcompTarget = nullptr;
dcompVisual = nullptr;
dcompDevice = nullptr;
#endif
swapChain = nullptr;
delete presentFence;
textureUploadFence = nullptr;
deviceManager()->unref();
initialized = false;
// 'window' must be kept, may just be a device loss
}
void QSGD3D12EnginePrivate::initialize(WId w, const QSize &size, float dpr, int surfaceFormatSamples, bool alpha)
{
if (initialized)
return;
window = w;
windowSize = size;
windowDpr = dpr;
windowSamples = qMax(1, surfaceFormatSamples); // may be -1 or 0, whereas windowSamples is uint and >= 1
windowAlpha = alpha;
swapChainBufferCount = qMin(qEnvironmentVariableIntValue("QT_D3D_BUFFER_COUNT"), MAX_SWAP_CHAIN_BUFFER_COUNT);
if (swapChainBufferCount < 2)
swapChainBufferCount = DEFAULT_SWAP_CHAIN_BUFFER_COUNT;
frameInFlightCount = qMin(qEnvironmentVariableIntValue("QT_D3D_FRAME_COUNT"), MAX_FRAME_IN_FLIGHT_COUNT);
if (frameInFlightCount < 1)
frameInFlightCount = DEFAULT_FRAME_IN_FLIGHT_COUNT;
static const char *latReqEnvVar = "QT_D3D_WAITABLE_SWAP_CHAIN_MAX_LATENCY";
if (!qEnvironmentVariableIsSet(latReqEnvVar))
waitableSwapChainMaxLatency = DEFAULT_WAITABLE_SWAP_CHAIN_MAX_LATENCY;
else
waitableSwapChainMaxLatency = qBound(0, qEnvironmentVariableIntValue(latReqEnvVar), 16);
if (qEnvironmentVariableIsSet("QSG_INFO"))
const_cast<QLoggingCategory &>(QSG_LOG_INFO_GENERAL()).setEnabled(QtDebugMsg, true);
qCDebug(QSG_LOG_INFO_GENERAL, "d3d12 engine init. swap chain buffer count %d, max frames prepared without blocking %d",
swapChainBufferCount, frameInFlightCount);
if (waitableSwapChainMaxLatency)
qCDebug(QSG_LOG_INFO_GENERAL, "Swap chain frame latency waitable object enabled. Frame latency is %d", waitableSwapChainMaxLatency);
const bool debugLayer = qEnvironmentVariableIntValue("QT_D3D_DEBUG") != 0;
if (debugLayer) {
qCDebug(QSG_LOG_INFO_GENERAL, "Enabling debug layer");
#if !defined(Q_OS_WINRT) || !defined(NDEBUG)
ComPtr<ID3D12Debug> debugController;
if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController))))
debugController->EnableDebugLayer();
#else
qCDebug(QSG_LOG_INFO_GENERAL, "Using DebugInterface will not allow certification to pass");
#endif
}
QSGD3D12DeviceManager *dev = deviceManager();
device = dev->ref();
dev->registerDeviceLossObserver(this);
if (debugLayer) {
ComPtr<ID3D12InfoQueue> infoQueue;
if (SUCCEEDED(device->QueryInterface(IID_PPV_ARGS(&infoQueue)))) {
infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true);
infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true);
const bool breakOnWarning = qEnvironmentVariableIntValue("QT_D3D_DEBUG_BREAK_ON_WARNING") != 0;
infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, breakOnWarning);
D3D12_INFO_QUEUE_FILTER filter = {};
D3D12_MESSAGE_ID suppressedMessages[] = {
// When using a render target other than the default one we
// have no way to know the custom clear color, if there is one.
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE
};
filter.DenyList.NumIDs = _countof(suppressedMessages);
filter.DenyList.pIDList = suppressedMessages;
// setting the filter would enable Info messages which we don't need
D3D12_MESSAGE_SEVERITY infoSev = D3D12_MESSAGE_SEVERITY_INFO;
filter.DenyList.NumSeverities = 1;
filter.DenyList.pSeverityList = &infoSev;
infoQueue->PushStorageFilter(&filter);
}
}
D3D12_COMMAND_QUEUE_DESC queueDesc = {};
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
if (FAILED(device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&commandQueue)))) {
qWarning("Failed to create command queue");
return;
}
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
if (FAILED(device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&copyCommandQueue)))) {
qWarning("Failed to create copy command queue");
return;
}
#ifndef Q_OS_WINRT
HWND hwnd = reinterpret_cast<HWND>(w);
if (windowAlpha) {
// Go through DirectComposition for semi-transparent windows since the
// traditional approaches won't fly with flip model swapchains.
HRESULT hr = DCompositionCreateDevice(nullptr, IID_PPV_ARGS(&dcompDevice));
if (SUCCEEDED(hr)) {
hr = dcompDevice->CreateTargetForHwnd(hwnd, true, &dcompTarget);
if (SUCCEEDED(hr)) {
hr = dcompDevice->CreateVisual(&dcompVisual);
if (FAILED(hr)) {
qWarning("Failed to create DirectComposition visual: %s",
qPrintable(comErrorMessage(hr)));
windowAlpha = false;
}
} else {
qWarning("Failed to create DirectComposition target: %s",
qPrintable(comErrorMessage(hr)));
windowAlpha = false;
}
} else {
qWarning("Failed to create DirectComposition device: %s",
qPrintable(comErrorMessage(hr)));
windowAlpha = false;
}
}
if (windowAlpha) {
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
swapChainDesc.Width = windowSize.width() * windowDpr;
swapChainDesc.Height = windowSize.height() * windowDpr;
swapChainDesc.Format = RT_COLOR_FORMAT;
swapChainDesc.SampleDesc.Count = 1;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.BufferCount = swapChainBufferCount;
swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;
if (waitableSwapChainMaxLatency)
swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
ComPtr<IDXGISwapChain1> baseSwapChain;
HRESULT hr = dev->dxgi()->CreateSwapChainForComposition(commandQueue.Get(), &swapChainDesc, nullptr, &baseSwapChain);
if (SUCCEEDED(hr)) {
if (SUCCEEDED(baseSwapChain.As(&swapChain))) {
hr = dcompVisual->SetContent(swapChain.Get());
if (SUCCEEDED(hr)) {
hr = dcompTarget->SetRoot(dcompVisual.Get());
if (FAILED(hr)) {
qWarning("SetRoot failed for DirectComposition target: %s",
qPrintable(comErrorMessage(hr)));
windowAlpha = false;
}
} else {
qWarning("SetContent failed for DirectComposition visual: %s",
qPrintable(comErrorMessage(hr)));
windowAlpha = false;
}
} else {
qWarning("Failed to cast swap chain");
windowAlpha = false;
}
} else {
qWarning("Failed to create swap chain for composition: 0x%x", hr);
windowAlpha = false;
}
}
if (!windowAlpha) {
DXGI_SWAP_CHAIN_DESC swapChainDesc = {};
swapChainDesc.BufferCount = swapChainBufferCount;
swapChainDesc.BufferDesc.Width = windowSize.width() * windowDpr;
swapChainDesc.BufferDesc.Height = windowSize.height() * windowDpr;
swapChainDesc.BufferDesc.Format = RT_COLOR_FORMAT;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; // D3D12 requires the flip model
swapChainDesc.OutputWindow = hwnd;
swapChainDesc.SampleDesc.Count = 1; // Flip does not support MSAA so no choice here
swapChainDesc.Windowed = TRUE;
if (waitableSwapChainMaxLatency)
swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
ComPtr<IDXGISwapChain> baseSwapChain;
HRESULT hr = dev->dxgi()->CreateSwapChain(commandQueue.Get(), &swapChainDesc, &baseSwapChain);
if (FAILED(hr)) {
qWarning("Failed to create swap chain: %s", qPrintable(comErrorMessage(hr)));
return;
}
hr = baseSwapChain.As(&swapChain);
if (FAILED(hr)) {
qWarning("Failed to cast swap chain: %s", qPrintable(comErrorMessage(hr)));
return;
}
}
dev->dxgi()->MakeWindowAssociation(hwnd, DXGI_MWA_NO_ALT_ENTER);
#else
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
swapChainDesc.Width = windowSize.width() * windowDpr;
swapChainDesc.Height = windowSize.height() * windowDpr;
swapChainDesc.Format = RT_COLOR_FORMAT;
swapChainDesc.SampleDesc.Count = 1;
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swapChainDesc.BufferCount = swapChainBufferCount;
swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;
if (waitableSwapChainMaxLatency)
swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
ComPtr<IDXGISwapChain1> baseSwapChain;
HRESULT hr = dev->dxgi()->CreateSwapChainForComposition(commandQueue.Get(), &swapChainDesc, nullptr, &baseSwapChain);
if (FAILED(hr)) {
qWarning("Failed to create swap chain for composition: 0x%x", hr);
return;
}
if (FAILED(baseSwapChain.As(&swapChain))) {
qWarning("Failed to cast swap chain");
return;
}
// The winrt platform plugin returns an ISwapChainPanel* from winId().
ComPtr<ABI::Windows::UI::Xaml::Controls::ISwapChainPanel> swapChainPanel
= reinterpret_cast<ABI::Windows::UI::Xaml::Controls::ISwapChainPanel *>(window);
ComPtr<ISwapChainPanelNative> swapChainPanelNative;
if (FAILED(swapChainPanel.As(&swapChainPanelNative))) {
qWarning("Failed to cast swap chain panel to native");
return;
}
hr = QEventDispatcherWinRT::runOnXamlThread([this, &swapChainPanelNative]() {
return swapChainPanelNative->SetSwapChain(swapChain.Get());
});
if (FAILED(hr)) {
qWarning("Failed to set swap chain on panel: 0x%x", hr);
return;
}
#endif
if (waitableSwapChainMaxLatency) {
if (FAILED(swapChain->SetMaximumFrameLatency(waitableSwapChainMaxLatency)))
qWarning("Failed to set maximum frame latency to %d", waitableSwapChainMaxLatency);
swapEvent = swapChain->GetFrameLatencyWaitableObject();
}
for (int i = 0; i < frameInFlightCount; ++i) {
if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frameCommandAllocator[i])))) {
qWarning("Failed to create command allocator");
return;
}
}
if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&copyCommandAllocator)))) {
qWarning("Failed to create copy command allocator");
return;
}
for (int i = 0; i < frameInFlightCount; ++i) {
if (!createCbvSrvUavHeap(i, GPU_CBVSRVUAV_DESCRIPTORS))
return;
}
cpuDescHeapManager.initialize(device);
setupDefaultRenderTargets();
if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, frameCommandAllocator[0].Get(),
nullptr, IID_PPV_ARGS(&frameCommandList)))) {
qWarning("Failed to create command list");
return;
}
// created in recording state, close it for now
frameCommandList->Close();
if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, copyCommandAllocator.Get(),
nullptr, IID_PPV_ARGS(&copyCommandList)))) {
qWarning("Failed to create copy command list");
return;
}
copyCommandList->Close();
frameIndex = 0;
presentFence = createCPUWaitableFence();
for (int i = 0; i < frameInFlightCount; ++i)
frameFence[i] = createCPUWaitableFence();
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&textureUploadFence)))) {
qWarning("Failed to create fence");
return;
}
psoCache.setMaxCost(MAX_CACHED_PSO);
rootSigCache.setMaxCost(MAX_CACHED_ROOTSIG);
if (!mipmapper.initialize(this))
return;
if (!devLossTest.initialize(this))
return;
currentRenderTarget = 0;
initialized = true;
}
bool QSGD3D12EnginePrivate::createCbvSrvUavHeap(int pframeIndex, int descriptorCount)
{
D3D12_DESCRIPTOR_HEAP_DESC gpuDescHeapDesc = {};
gpuDescHeapDesc.NumDescriptors = descriptorCount;
gpuDescHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
gpuDescHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
if (FAILED(device->CreateDescriptorHeap(&gpuDescHeapDesc, IID_PPV_ARGS(&pframeData[pframeIndex].gpuCbvSrvUavHeap)))) {
qWarning("Failed to create shader-visible CBV-SRV-UAV heap");
return false;
}
pframeData[pframeIndex].gpuCbvSrvUavHeapSize = descriptorCount;
return true;
}
DXGI_SAMPLE_DESC QSGD3D12EnginePrivate::makeSampleDesc(DXGI_FORMAT format, uint samples)
{
DXGI_SAMPLE_DESC sampleDesc;
sampleDesc.Count = 1;
sampleDesc.Quality = 0;
if (samples > 1) {
D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msaaInfo = {};
msaaInfo.Format = format;
msaaInfo.SampleCount = samples;
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msaaInfo, sizeof(msaaInfo)))) {
if (msaaInfo.NumQualityLevels > 0) {
sampleDesc.Count = samples;
sampleDesc.Quality = msaaInfo.NumQualityLevels - 1;
} else {
qWarning("No quality levels for multisampling with sample count %d", samples);
}
} else {
qWarning("Failed to query multisample quality levels for sample count %d", samples);
}
}
return sampleDesc;
}
ID3D12Resource *QSGD3D12EnginePrivate::createColorBuffer(D3D12_CPU_DESCRIPTOR_HANDLE viewHandle, const QSize &size,
const QVector4D &clearColor, uint samples)
{
D3D12_CLEAR_VALUE clearValue = {};
clearValue.Format = RT_COLOR_FORMAT;
clearValue.Color[0] = clearColor.x();
clearValue.Color[1] = clearColor.y();
clearValue.Color[2] = clearColor.z();
clearValue.Color[3] = clearColor.w();
D3D12_HEAP_PROPERTIES heapProp = {};
heapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
D3D12_RESOURCE_DESC rtDesc = {};
rtDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
rtDesc.Width = size.width();
rtDesc.Height = size.height();
rtDesc.DepthOrArraySize = 1;
rtDesc.MipLevels = 1;
rtDesc.Format = RT_COLOR_FORMAT;
rtDesc.SampleDesc = makeSampleDesc(rtDesc.Format, samples);
rtDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
ID3D12Resource *resource = nullptr;
const D3D12_RESOURCE_STATES initialState = samples <= 1
? D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE
: D3D12_RESOURCE_STATE_RENDER_TARGET;
if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &rtDesc,
initialState, &clearValue, IID_PPV_ARGS(&resource)))) {
qWarning("Failed to create offscreen render target of size %dx%d", size.width(), size.height());
return nullptr;
}
device->CreateRenderTargetView(resource, nullptr, viewHandle);
return resource;
}
ID3D12Resource *QSGD3D12EnginePrivate::createDepthStencil(D3D12_CPU_DESCRIPTOR_HANDLE viewHandle, const QSize &size, uint samples)
{
D3D12_CLEAR_VALUE depthClearValue = {};
depthClearValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
depthClearValue.DepthStencil.Depth = 1.0f;
depthClearValue.DepthStencil.Stencil = 0;
D3D12_HEAP_PROPERTIES heapProp = {};
heapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
D3D12_RESOURCE_DESC bufDesc = {};
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
bufDesc.Width = size.width();
bufDesc.Height = size.height();
bufDesc.DepthOrArraySize = 1;
bufDesc.MipLevels = 1;
bufDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
bufDesc.SampleDesc = makeSampleDesc(bufDesc.Format, samples);
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
bufDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
ID3D12Resource *resource = nullptr;
if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &bufDesc,
D3D12_RESOURCE_STATE_DEPTH_WRITE, &depthClearValue, IID_PPV_ARGS(&resource)))) {
qWarning("Failed to create depth-stencil buffer of size %dx%d", size.width(), size.height());
return nullptr;
}
D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilDesc = {};
depthStencilDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
depthStencilDesc.ViewDimension = bufDesc.SampleDesc.Count <= 1 ? D3D12_DSV_DIMENSION_TEXTURE2D : D3D12_DSV_DIMENSION_TEXTURE2DMS;
device->CreateDepthStencilView(resource, &depthStencilDesc, viewHandle);
return resource;
}
void QSGD3D12EnginePrivate::setupDefaultRenderTargets()
{
for (int i = 0; i < swapChainBufferCount; ++i) {
if (FAILED(swapChain->GetBuffer(i, IID_PPV_ARGS(&backBufferRT[i])))) {
qWarning("Failed to get buffer %d from swap chain", i);
return;
}
defaultRTV[i] = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
if (windowSamples == 1) {
defaultRT[i] = backBufferRT[i];
device->CreateRenderTargetView(defaultRT[i].Get(), nullptr, defaultRTV[i]);
} else {
const QSize size(windowSize.width() * windowDpr, windowSize.height() * windowDpr);
// Not optimal if the user called setClearColor, but there's so
// much we can do. The debug layer warning is suppressed so we're good to go.
const QColor cc(Qt::white);
const QVector4D clearColor(cc.redF(), cc.greenF(), cc.blueF(), cc.alphaF());
ID3D12Resource *msaaRT = createColorBuffer(defaultRTV[i], size, clearColor, windowSamples);
if (msaaRT)
defaultRT[i].Attach(msaaRT);
}
}
defaultDSV = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
const QSize size(windowSize.width() * windowDpr, windowSize.height() * windowDpr);
ID3D12Resource *ds = createDepthStencil(defaultDSV, size, windowSamples);
if (ds)
defaultDS.Attach(ds);
presentFrameIndex = 0;
}
void QSGD3D12EnginePrivate::setWindowSize(const QSize &size, float dpr)
{
if (!initialized || (windowSize == size && windowDpr == dpr))
return;
waitGPU();
windowSize = size;
windowDpr = dpr;
if (Q_UNLIKELY(debug_render()))
qDebug() << "resize" << size << dpr;
// Clear these, otherwise resizing will fail.
defaultDS = nullptr;
cpuDescHeapManager.release(defaultDSV, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
for (int i = 0; i < swapChainBufferCount; ++i) {
backBufferRT[i] = nullptr;
defaultRT[i] = nullptr;
cpuDescHeapManager.release(defaultRTV[i], D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
}
const int w = windowSize.width() * windowDpr;
const int h = windowSize.height() * windowDpr;
HRESULT hr = swapChain->ResizeBuffers(swapChainBufferCount, w, h, RT_COLOR_FORMAT,
waitableSwapChainMaxLatency ? DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT : 0);
if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
deviceManager()->deviceLossDetected();
return;
} else if (FAILED(hr)) {
qWarning("Failed to resize buffers: %s", qPrintable(comErrorMessage(hr)));
return;
}
setupDefaultRenderTargets();
}
void QSGD3D12EnginePrivate::deviceLost()
{
qWarning("D3D device lost, will attempt to reinitialize");
// Release all resources. This is important because otherwise reinitialization may fail.
releaseResources();
// Now in uninitialized state (but 'window' is still valid). Will recreate
// all the resources on the next beginFrame().
}
QSGD3D12CPUWaitableFence *QSGD3D12EnginePrivate::createCPUWaitableFence() const
{
QSGD3D12CPUWaitableFence *f = new QSGD3D12CPUWaitableFence;
HRESULT hr = device->CreateFence(f->value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&f->fence));
if (FAILED(hr)) {
qWarning("Failed to create fence: %s", qPrintable(comErrorMessage(hr)));
return f;
}
f->event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
return f;
}
void QSGD3D12EnginePrivate::waitForGPU(QSGD3D12CPUWaitableFence *f) const
{
const UINT64 newValue = f->value.fetchAndAddAcquire(1) + 1;
commandQueue->Signal(f->fence.Get(), newValue);
if (f->fence->GetCompletedValue() < newValue) {
HRESULT hr = f->fence->SetEventOnCompletion(newValue, f->event);
if (FAILED(hr)) {
qWarning("SetEventOnCompletion failed: %s", qPrintable(comErrorMessage(hr)));
return;
}
WaitForSingleObject(f->event, INFINITE);
}
}
void QSGD3D12EnginePrivate::transitionResource(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList,
D3D12_RESOURCE_STATES before, D3D12_RESOURCE_STATES after) const
{
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = resource;
barrier.Transition.StateBefore = before;
barrier.Transition.StateAfter = after;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
commandList->ResourceBarrier(1, &barrier);
}
void QSGD3D12EnginePrivate::resolveMultisampledTarget(ID3D12Resource *msaa,
ID3D12Resource *resolve,
D3D12_RESOURCE_STATES resolveUsage,
ID3D12GraphicsCommandList *commandList) const
{
D3D12_RESOURCE_BARRIER barriers[2];
for (int i = 0; i < _countof(barriers); ++i) {
barriers[i].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[i].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[i].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
}
barriers[0].Transition.pResource = msaa;
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE;
barriers[1].Transition.pResource = resolve;
barriers[1].Transition.StateBefore = resolveUsage;
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_RESOLVE_DEST;
commandList->ResourceBarrier(2, barriers);
commandList->ResolveSubresource(resolve, 0, msaa, 0, RT_COLOR_FORMAT);
barriers[0].Transition.pResource = msaa;
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RESOLVE_SOURCE;
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
barriers[1].Transition.pResource = resolve;
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_RESOLVE_DEST;
barriers[1].Transition.StateAfter = resolveUsage;
commandList->ResourceBarrier(2, barriers);
}
void QSGD3D12EnginePrivate::uavBarrier(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList) const
{
D3D12_RESOURCE_BARRIER barrier = {};
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barrier.UAV.pResource = resource;
commandList->ResourceBarrier(1, &barrier);
}
ID3D12Resource *QSGD3D12EnginePrivate::createBuffer(int size)
{
ID3D12Resource *buf;
D3D12_HEAP_PROPERTIES uploadHeapProp = {};
uploadHeapProp.Type = D3D12_HEAP_TYPE_UPLOAD;
D3D12_RESOURCE_DESC bufDesc = {};
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
bufDesc.Width = size;
bufDesc.Height = 1;
bufDesc.DepthOrArraySize = 1;
bufDesc.MipLevels = 1;
bufDesc.Format = DXGI_FORMAT_UNKNOWN;
bufDesc.SampleDesc.Count = 1;
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
HRESULT hr = device->CreateCommittedResource(&uploadHeapProp, D3D12_HEAP_FLAG_NONE, &bufDesc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&buf));
if (FAILED(hr))
qWarning("Failed to create buffer resource: %s", qPrintable(comErrorMessage(hr)));
return buf;
}
void QSGD3D12EnginePrivate::ensureBuffer(Buffer *buf)
{
Buffer::InFlightData &bfd(buf->d[currentPFrameIndex]);
// Only enlarge, never shrink
const bool newBufferNeeded = bfd.buffer ? (buf->cpuDataRef.size > bfd.resourceSize) : true;
if (newBufferNeeded) {
// Round it up and overallocate a little bit so that a subsequent
// buffer contents rebuild with a slightly larger total size does
// not lead to creating a new buffer.
const quint32 sz = alignedSize(buf->cpuDataRef.size, 4096);
if (Q_UNLIKELY(debug_buffer()))
qDebug("new buffer[pf=%d] of size %d (actual data size %d)", currentPFrameIndex, sz, buf->cpuDataRef.size);
bfd.buffer.Attach(createBuffer(sz));
bfd.resourceSize = sz;
}
// Cache the actual data size in the per-in-flight-frame data as well.
bfd.dataSize = buf->cpuDataRef.size;
}
void QSGD3D12EnginePrivate::updateBuffer(Buffer *buf)
{
if (buf->cpuDataRef.dirty.isEmpty())
return;
Buffer::InFlightData &bfd(buf->d[currentPFrameIndex]);
quint8 *p = nullptr;
const D3D12_RANGE readRange = { 0, 0 };
if (FAILED(bfd.buffer->Map(0, &readRange, reinterpret_cast<void **>(&p)))) {
qWarning("Map failed for buffer of size %d", buf->cpuDataRef.size);
return;
}
for (const auto &r : qAsConst(buf->cpuDataRef.dirty)) {
if (Q_UNLIKELY(debug_buffer()))
qDebug("%p o %d s %d", buf, r.first, r.second);
memcpy(p + r.first, buf->cpuDataRef.p + r.first, r.second);
}
bfd.buffer->Unmap(0, nullptr);
buf->cpuDataRef.dirty.clear();
}
void QSGD3D12EnginePrivate::ensureDevice()
{
if (!initialized && window)
initialize(window, windowSize, windowDpr, windowSamples, windowAlpha);
}
void QSGD3D12EnginePrivate::beginFrame()
{
if (inFrame && !activeLayers)
qFatal("beginFrame called again without an endFrame, frame index was %d", frameIndex);
if (Q_UNLIKELY(debug_render()))
qDebug() << "***** begin frame, logical" << frameIndex << "present" << presentFrameIndex << "layer" << activeLayers;
if (inFrame && activeLayers) {
if (Q_UNLIKELY(debug_render()))
qDebug("frame %d already in progress", frameIndex);
if (!currentLayerDepth) {
// There are layers and the real frame preparation starts now. Prepare for present.
beginFrameDraw();
}
return;
}
inFrame = true;
// The device may have been lost. This is the point to attempt to start
// again from scratch. Except when it is not. Operations that can happen
// out of frame (e.g. textures, render targets) may trigger reinit earlier
// than beginFrame.
ensureDevice();
// Wait for a buffer to be available for Present, if the waitable event is in use.
if (waitableSwapChainMaxLatency)
WaitForSingleObject(swapEvent, INFINITE);
// Block if needed. With 2 frames in flight frame N waits for frame N - 2, but not N - 1, to finish.
currentPFrameIndex = frameIndex % frameInFlightCount;
if (frameIndex >= frameInFlightCount) {
ID3D12Fence *fence = frameFence[currentPFrameIndex]->fence.Get();
HANDLE event = frameFence[currentPFrameIndex]->event;
// Frame fence values start from 1, hence the +1.
const quint64 inFlightFenceValue = frameIndex - frameInFlightCount + 1;
if (fence->GetCompletedValue() < inFlightFenceValue) {
fence->SetEventOnCompletion(inFlightFenceValue, event);
WaitForSingleObject(event, INFINITE);
}
frameCommandAllocator[currentPFrameIndex]->Reset();
}
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
pfd.cbvSrvUavNextFreeDescriptorIndex = 0;
for (Buffer &b : buffers) {
if (b.entryInUse())
b.d[currentPFrameIndex].dirty.clear();
}
if (frameIndex >= frameInFlightCount - 1) {
// Now sync the buffer changes from the previous, potentially still in
// flight, frames. This is done by taking the ranges dirtied in those
// frames and adding them to the global CPU-side buffer's dirty list,
// as if this frame changed those ranges. (however, dirty ranges
// inherited this way are not added to this frame's persistent
// per-frame dirty list because the next frame after this one should
// inherit this frame's genuine changes only, the rest will come from
// the earlier ones)
for (int delta = frameInFlightCount - 1; delta >= 1; --delta) {
const int prevPFrameIndex = (frameIndex - delta) % frameInFlightCount;
PersistentFrameData &prevFrameData(pframeData[prevPFrameIndex]);
for (uint id : qAsConst(prevFrameData.buffersUsedInFrame)) {
Buffer &b(buffers[id - 1]);
if (b.d[currentPFrameIndex].buffer && b.d[currentPFrameIndex].dataSize == b.cpuDataRef.size) {
if (Q_UNLIKELY(debug_buffer()))
qDebug() << "frame" << frameIndex << "takes dirty" << b.d[prevPFrameIndex].dirty
<< "from frame" << frameIndex - delta << "for buffer" << id;
for (const auto &range : qAsConst(b.d[prevPFrameIndex].dirty))
addDirtyRange(&b.cpuDataRef.dirty, range.first, range.second, b.cpuDataRef.size);
} else {
if (Q_UNLIKELY(debug_buffer()))
qDebug() << "frame" << frameIndex << "makes all dirty from frame" << frameIndex - delta
<< "for buffer" << id;
addDirtyRange(&b.cpuDataRef.dirty, 0, b.cpuDataRef.size, b.cpuDataRef.size);
}
}
}
}
if (frameIndex >= frameInFlightCount) {
// Do some texture upload bookkeeping.
const quint64 finishedFrameIndex = frameIndex - frameInFlightCount; // we know since we just blocked for this
// pfd conveniently refers to the same slot that was used by that frame
if (!pfd.pendingTextureUploads.isEmpty()) {
if (Q_UNLIKELY(debug_texture()))
qDebug("Removing texture upload data for frame %d", finishedFrameIndex);
for (uint id : qAsConst(pfd.pendingTextureUploads)) {
const int idx = id - 1;
Texture &t(textures[idx]);
// fenceValue is 0 when the previous frame cleared it, skip in
// this case. Skip also when fenceValue > the value it was when
// adding the last GPU wait - this is the case when more
// uploads were queued for the same texture in the meantime.
if (t.fenceValue && t.fenceValue == t.lastWaitFenceValue) {
t.fenceValue = 0;
t.lastWaitFenceValue = 0;
t.stagingBuffers.clear();
t.stagingHeaps.clear();
if (Q_UNLIKELY(debug_texture()))
qDebug("Cleaned staging data for texture %u", id);
}
}
pfd.pendingTextureUploads.clear();
if (!pfd.pendingTextureMipMap.isEmpty()) {
if (Q_UNLIKELY(debug_texture()))
qDebug() << "cleaning mipmap generation data for " << pfd.pendingTextureMipMap;
// no special cleanup is needed as mipmap generation uses the frame's resources
pfd.pendingTextureMipMap.clear();
}
bool hasPending = false;
for (int delta = 1; delta < frameInFlightCount; ++delta) {
const PersistentFrameData &prevFrameData(pframeData[(frameIndex - delta) % frameInFlightCount]);
if (!prevFrameData.pendingTextureUploads.isEmpty()) {
hasPending = true;
break;
}
}
if (!hasPending) {
if (Q_UNLIKELY(debug_texture()))
qDebug("no more pending textures");
copyCommandAllocator->Reset();
}
}
// Do the deferred deletes.
if (!pfd.deleteQueue.isEmpty()) {
for (PersistentFrameData::DeleteQueueEntry &e : pfd.deleteQueue) {
e.res = nullptr;
e.descHeap = nullptr;
if (e.cpuDescriptorPtr) {
D3D12_CPU_DESCRIPTOR_HANDLE h = { e.cpuDescriptorPtr };
cpuDescHeapManager.release(h, e.descHeapType);
}
}
pfd.deleteQueue.clear();
}
// Deferred deletes issued outside a begin-endFrame go to the next
// frame's out-of-frame delete queue as these cannot be executed in the
// next beginFrame, only in next + frameInFlightCount. Move to the
// normal queue if this is the next beginFrame.
if (!pfd.outOfFrameDeleteQueue.isEmpty()) {
pfd.deleteQueue = pfd.outOfFrameDeleteQueue;
pfd.outOfFrameDeleteQueue.clear();
}
// Mark released texture, buffer, etc. slots free.
if (!pfd.pendingReleases.isEmpty()) {
for (const auto &pr : qAsConst(pfd.pendingReleases)) {
Q_ASSERT(pr.id);
if (pr.type == PersistentFrameData::PendingRelease::TypeTexture) {
Texture &t(textures[pr.id - 1]);
Q_ASSERT(t.entryInUse());
t.flags &= ~RenderTarget::EntryInUse; // createTexture() can now reuse this entry
t.texture = nullptr;
} else if (pr.type == PersistentFrameData::PendingRelease::TypeBuffer) {
Buffer &b(buffers[pr.id - 1]);
Q_ASSERT(b.entryInUse());
b.flags &= ~Buffer::EntryInUse;
for (int i = 0; i < frameInFlightCount; ++i)
b.d[i].buffer = nullptr;
} else {
qFatal("Corrupt pending release list, type %d", pr.type);
}
}
pfd.pendingReleases.clear();
}
if (!pfd.outOfFramePendingReleases.isEmpty()) {
pfd.pendingReleases = pfd.outOfFramePendingReleases;
pfd.outOfFramePendingReleases.clear();
}
}
pfd.buffersUsedInFrame.clear();
beginDrawCalls();
// Prepare for present if this is a frame without layers.
if (!activeLayers)
beginFrameDraw();
}
void QSGD3D12EnginePrivate::beginDrawCalls()
{
frameCommandList->Reset(frameCommandAllocator[frameIndex % frameInFlightCount].Get(), nullptr);
commandList = frameCommandList.Get();
invalidateCachedFrameState();
}
void QSGD3D12EnginePrivate::invalidateCachedFrameState()
{
tframeData.drawingMode = QSGGeometry::DrawingMode(-1);
tframeData.currentIndexBuffer = 0;
tframeData.activeTextureCount = 0;
tframeData.drawCount = 0;
tframeData.lastPso = nullptr;
tframeData.lastRootSig = nullptr;
tframeData.descHeapSet = false;
}
void QSGD3D12EnginePrivate::restoreFrameState(bool minimal)
{
queueSetRenderTarget(currentRenderTarget);
if (!minimal) {
queueViewport(tframeData.viewport);
queueScissor(tframeData.scissor);
queueSetBlendFactor(tframeData.blendFactor);
queueSetStencilRef(tframeData.stencilRef);
}
finalizePipeline(tframeData.pipelineState);
}
void QSGD3D12EnginePrivate::beginFrameDraw()
{
if (windowSamples == 1)
transitionResource(defaultRT[presentFrameIndex % swapChainBufferCount].Get(), commandList,
D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET);
}
void QSGD3D12EnginePrivate::endFrame()
{
if (!inFrame)
qFatal("endFrame called without beginFrame, frame index %d", frameIndex);
if (Q_UNLIKELY(debug_render()))
qDebug("***** end frame");
endDrawCalls(true);
commandQueue->Signal(frameFence[frameIndex % frameInFlightCount]->fence.Get(), frameIndex + 1);
++frameIndex;
inFrame = false;
}
void QSGD3D12EnginePrivate::endDrawCalls(bool lastInFrame)
{
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
// Now is the time to sync all the changed areas in the buffers.
if (Q_UNLIKELY(debug_buffer()))
qDebug() << "buffers used in drawcall set" << pfd.buffersUsedInDrawCallSet;
for (uint id : qAsConst(pfd.buffersUsedInDrawCallSet))
updateBuffer(&buffers[id - 1]);
pfd.buffersUsedInFrame += pfd.buffersUsedInDrawCallSet;
pfd.buffersUsedInDrawCallSet.clear();
// Add a wait on the 3D queue for the relevant texture uploads on the copy queue.
if (!pfd.pendingTextureUploads.isEmpty()) {
quint64 topFenceValue = 0;
for (uint id : qAsConst(pfd.pendingTextureUploads)) {
const int idx = id - 1;
Texture &t(textures[idx]);
Q_ASSERT(t.fenceValue);
// skip if already added a Wait in the previous frame
if (t.lastWaitFenceValue == t.fenceValue)
continue;
t.lastWaitFenceValue = t.fenceValue;
if (t.fenceValue > topFenceValue)
topFenceValue = t.fenceValue;
if (t.mipmap())
pfd.pendingTextureMipMap.insert(id);
}
if (topFenceValue) {
if (Q_UNLIKELY(debug_texture()))
qDebug("added wait for texture fence %llu", topFenceValue);
commandQueue->Wait(textureUploadFence.Get(), topFenceValue);
// Generate mipmaps after the wait, when necessary.
if (!pfd.pendingTextureMipMap.isEmpty()) {
if (Q_UNLIKELY(debug_texture()))
qDebug() << "starting mipmap generation for" << pfd.pendingTextureMipMap;
for (uint id : qAsConst(pfd.pendingTextureMipMap))
mipmapper.queueGenerate(textures[id - 1]);
}
}
}
if (lastInFrame) {
// Resolve and transition the backbuffer for present, if needed.
const int idx = presentFrameIndex % swapChainBufferCount;
if (windowSamples == 1) {
transitionResource(defaultRT[idx].Get(), commandList,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT);
} else {
if (Q_UNLIKELY(debug_render())) {
const D3D12_RESOURCE_DESC desc = defaultRT[idx]->GetDesc();
qDebug("added resolve for multisampled render target (count %d, quality %d)",
desc.SampleDesc.Count, desc.SampleDesc.Quality);
}
resolveMultisampledTarget(defaultRT[idx].Get(), backBufferRT[idx].Get(),
D3D12_RESOURCE_STATE_PRESENT, commandList);
}
if (activeLayers) {
if (Q_UNLIKELY(debug_render()))
qDebug("this frame had %d layers", activeLayers);
activeLayers = 0;
}
}
// Go!
HRESULT hr = frameCommandList->Close();
if (FAILED(hr)) {
qWarning("Failed to close command list: %s", qPrintable(comErrorMessage(hr)));
if (hr == E_INVALIDARG)
qWarning("Invalid arguments. Some of the commands in the list is invalid in some way.");
}
ID3D12CommandList *commandLists[] = { frameCommandList.Get() };
commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
commandList = nullptr;
}
void QSGD3D12EnginePrivate::beginLayer()
{
if (inFrame && !activeLayers)
qFatal("Layer rendering cannot be started while a frame is active");
if (Q_UNLIKELY(debug_render()))
qDebug("===== beginLayer active %d depth %d (inFrame=%d)", activeLayers, currentLayerDepth, inFrame);
++activeLayers;
++currentLayerDepth;
// Do an early beginFrame. With multiple layers this results in
// beginLayer - beginFrame - endLayer - beginLayer - beginFrame - endLayer - ... - (*) beginFrame - endFrame
// where (*) denotes the start of the preparation of the actual, non-layer frame.
if (activeLayers == 1)
beginFrame();
}
void QSGD3D12EnginePrivate::endLayer()
{
if (!inFrame || !activeLayers || !currentLayerDepth)
qFatal("Mismatched endLayer");
if (Q_UNLIKELY(debug_render()))
qDebug("===== endLayer active %d depth %d", activeLayers, currentLayerDepth);
--currentLayerDepth;
// Do not touch activeLayers. It remains valid until endFrame.
}
// Root signature:
// [0] CBV - always present
// [1] table with one SRV per texture (must be a table since root descriptor SRVs cannot be textures) - optional
// one static sampler per texture - optional
//
// SRVs can be created freely via QSGD3D12CPUDescriptorHeapManager and stored
// in QSGD3D12TextureView. The engine will copy them onto a dedicated,
// shader-visible CBV-SRV-UAV heap in the correct order.
void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipelineState)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
tframeData.pipelineState = pipelineState;
RootSigCacheEntry *cachedRootSig = rootSigCache[pipelineState.shaders.rootSig];
if (!cachedRootSig) {
if (Q_UNLIKELY(debug_render()))
qDebug("NEW ROOTSIG");
cachedRootSig = new RootSigCacheEntry;
D3D12_ROOT_PARAMETER rootParams[4];
int rootParamCount = 0;
rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
rootParams[0].Descriptor.ShaderRegister = 0; // b0
rootParams[0].Descriptor.RegisterSpace = 0;
++rootParamCount;
D3D12_DESCRIPTOR_RANGE tvDescRange;
if (pipelineState.shaders.rootSig.textureViewCount > 0) {
rootParams[rootParamCount].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParams[rootParamCount].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
rootParams[rootParamCount].DescriptorTable.NumDescriptorRanges = 1;
tvDescRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
tvDescRange.NumDescriptors = pipelineState.shaders.rootSig.textureViewCount;
tvDescRange.BaseShaderRegister = 0; // t0, t1, ...
tvDescRange.RegisterSpace = 0;
tvDescRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
rootParams[rootParamCount].DescriptorTable.pDescriptorRanges = &tvDescRange;
++rootParamCount;
}
Q_ASSERT(rootParamCount <= _countof(rootParams));
D3D12_ROOT_SIGNATURE_DESC desc;
desc.NumParameters = rootParamCount;
desc.pParameters = rootParams;
// Mixing up samplers and resource views in QSGD3D12TextureView means
// that the number of static samplers has to match the number of
// textures. This is not really ideal in general but works for Quick's use cases.
// The shaders can still choose to declare and use fewer samplers, if they want to.
desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViewCount;
D3D12_STATIC_SAMPLER_DESC staticSamplers[8];
int sdIdx = 0;
Q_ASSERT(pipelineState.shaders.rootSig.textureViewCount <= _countof(staticSamplers));
for (int i = 0; i < pipelineState.shaders.rootSig.textureViewCount; ++i) {
const QSGD3D12TextureView &tv(pipelineState.shaders.rootSig.textureViews[i]);
D3D12_STATIC_SAMPLER_DESC sd = {};
sd.Filter = D3D12_FILTER(tv.filter);
sd.AddressU = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeHoriz);
sd.AddressV = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeVert);
sd.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sd.MinLOD = 0.0f;
sd.MaxLOD = D3D12_FLOAT32_MAX;
sd.ShaderRegister = sdIdx; // t0, t1, ...
sd.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
staticSamplers[sdIdx++] = sd;
}
desc.pStaticSamplers = staticSamplers;
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
ComPtr<ID3DBlob> signature;
ComPtr<ID3DBlob> error;
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) {
QByteArray msg(static_cast<const char *>(error->GetBufferPointer()), error->GetBufferSize());
qWarning("Failed to serialize root signature: %s", qPrintable(msg));
return;
}
if (FAILED(device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(),
IID_PPV_ARGS(&cachedRootSig->rootSig)))) {
qWarning("Failed to create root signature");
return;
}
rootSigCache.insert(pipelineState.shaders.rootSig, cachedRootSig);
}
PSOCacheEntry *cachedPso = psoCache[pipelineState];
if (!cachedPso) {
if (Q_UNLIKELY(debug_render()))
qDebug("NEW PSO");
cachedPso = new PSOCacheEntry;
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
D3D12_INPUT_ELEMENT_DESC inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
int ieIdx = 0;
for (int i = 0; i < pipelineState.inputElementCount; ++i) {
const QSGD3D12InputElement &ie(pipelineState.inputElements[i]);
D3D12_INPUT_ELEMENT_DESC ieDesc = {};
ieDesc.SemanticName = ie.semanticName;
ieDesc.SemanticIndex = ie.semanticIndex;
ieDesc.Format = DXGI_FORMAT(ie.format);
ieDesc.InputSlot = ie.slot;
ieDesc.AlignedByteOffset = ie.offset;
ieDesc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
if (Q_UNLIKELY(debug_render()))
qDebug("input [%d]: %s %d 0x%x %d", ieIdx, ie.semanticName, ie.offset, ie.format, ie.slot);
inputElements[ieIdx++] = ieDesc;
}
psoDesc.InputLayout = { inputElements, UINT(ieIdx) };
psoDesc.pRootSignature = cachedRootSig->rootSig.Get();
D3D12_SHADER_BYTECODE vshader;
vshader.pShaderBytecode = pipelineState.shaders.vs;
vshader.BytecodeLength = pipelineState.shaders.vsSize;
D3D12_SHADER_BYTECODE pshader;
pshader.pShaderBytecode = pipelineState.shaders.ps;
pshader.BytecodeLength = pipelineState.shaders.psSize;
psoDesc.VS = vshader;
psoDesc.PS = pshader;
D3D12_RASTERIZER_DESC rastDesc = {};
rastDesc.FillMode = D3D12_FILL_MODE_SOLID;
rastDesc.CullMode = D3D12_CULL_MODE(pipelineState.cullMode);
rastDesc.FrontCounterClockwise = pipelineState.frontCCW;
rastDesc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
rastDesc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
rastDesc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
rastDesc.DepthClipEnable = TRUE;
psoDesc.RasterizerState = rastDesc;
D3D12_BLEND_DESC blendDesc = {};
if (pipelineState.blend == QSGD3D12PipelineState::BlendNone) {
D3D12_RENDER_TARGET_BLEND_DESC noBlendDesc = {};
noBlendDesc.RenderTargetWriteMask = pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0;
blendDesc.RenderTarget[0] = noBlendDesc;
} else if (pipelineState.blend == QSGD3D12PipelineState::BlendPremul) {
const D3D12_RENDER_TARGET_BLEND_DESC premulBlendDesc = {
TRUE, FALSE,
D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
D3D12_LOGIC_OP_NOOP,
UINT8(pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0)
};
blendDesc.RenderTarget[0] = premulBlendDesc;
} else if (pipelineState.blend == QSGD3D12PipelineState::BlendColor) {
const D3D12_RENDER_TARGET_BLEND_DESC colorBlendDesc = {
TRUE, FALSE,
D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_INV_SRC_COLOR, D3D12_BLEND_OP_ADD,
D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
D3D12_LOGIC_OP_NOOP,
UINT8(pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0)
};
blendDesc.RenderTarget[0] = colorBlendDesc;
}
psoDesc.BlendState = blendDesc;
psoDesc.DepthStencilState.DepthEnable = pipelineState.depthEnable;
psoDesc.DepthStencilState.DepthWriteMask = pipelineState.depthWrite ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC(pipelineState.depthFunc);
psoDesc.DepthStencilState.StencilEnable = pipelineState.stencilEnable;
psoDesc.DepthStencilState.StencilReadMask = psoDesc.DepthStencilState.StencilWriteMask = 0xFF;
D3D12_DEPTH_STENCILOP_DESC stencilOpDesc = {
D3D12_STENCIL_OP(pipelineState.stencilFailOp),
D3D12_STENCIL_OP(pipelineState.stencilDepthFailOp),
D3D12_STENCIL_OP(pipelineState.stencilPassOp),
D3D12_COMPARISON_FUNC(pipelineState.stencilFunc)
};
psoDesc.DepthStencilState.FrontFace = psoDesc.DepthStencilState.BackFace = stencilOpDesc;
psoDesc.SampleMask = UINT_MAX;
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE(pipelineState.topologyType);
psoDesc.NumRenderTargets = 1;
psoDesc.RTVFormats[0] = RT_COLOR_FORMAT;
psoDesc.DSVFormat = DXGI_FORMAT_D24_UNORM_S8_UINT;
psoDesc.SampleDesc = defaultRT[0]->GetDesc().SampleDesc;
HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&cachedPso->pso));
if (FAILED(hr)) {
qWarning("Failed to create graphics pipeline state: %s",
qPrintable(comErrorMessage(hr)));
return;
}
psoCache.insert(pipelineState, cachedPso);
}
if (cachedPso->pso.Get() != tframeData.lastPso) {
tframeData.lastPso = cachedPso->pso.Get();
commandList->SetPipelineState(tframeData.lastPso);
}
if (cachedRootSig->rootSig.Get() != tframeData.lastRootSig) {
tframeData.lastRootSig = cachedRootSig->rootSig.Get();
commandList->SetGraphicsRootSignature(tframeData.lastRootSig);
}
if (pipelineState.shaders.rootSig.textureViewCount > 0)
setDescriptorHeaps();
}
void QSGD3D12EnginePrivate::setDescriptorHeaps(bool force)
{
if (force || !tframeData.descHeapSet) {
tframeData.descHeapSet = true;
ID3D12DescriptorHeap *heaps[] = { pframeData[currentPFrameIndex].gpuCbvSrvUavHeap.Get() };
commandList->SetDescriptorHeaps(_countof(heaps), heaps);
}
}
void QSGD3D12EnginePrivate::queueViewport(const QRect &rect)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
tframeData.viewport = rect;
const D3D12_VIEWPORT viewport = { float(rect.x()), float(rect.y()), float(rect.width()), float(rect.height()), 0, 1 };
commandList->RSSetViewports(1, &viewport);
}
void QSGD3D12EnginePrivate::queueScissor(const QRect &rect)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
tframeData.scissor = rect;
const D3D12_RECT scissorRect = { rect.x(), rect.y(), rect.x() + rect.width(), rect.y() + rect.height() };
commandList->RSSetScissorRects(1, &scissorRect);
}
void QSGD3D12EnginePrivate::queueSetRenderTarget(uint id)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle;
D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle;
if (!id) {
rtvHandle = defaultRTV[presentFrameIndex % swapChainBufferCount];
dsvHandle = defaultDSV;
} else {
const int idx = id - 1;
Q_ASSERT(idx < renderTargets.count() && renderTargets[idx].entryInUse());
RenderTarget &rt(renderTargets[idx]);
rtvHandle = rt.rtv;
dsvHandle = rt.dsv;
if (!(rt.flags & RenderTarget::NeedsReadBarrier)) {
rt.flags |= RenderTarget::NeedsReadBarrier;
if (!(rt.flags & RenderTarget::Multisample))
transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
D3D12_RESOURCE_STATE_RENDER_TARGET);
}
}
commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, &dsvHandle);
currentRenderTarget = id;
}
void QSGD3D12EnginePrivate::queueClearRenderTarget(const QColor &color)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
const float clearColor[] = { float(color.redF()), float(color.blueF()), float(color.greenF()), float(color.alphaF()) };
D3D12_CPU_DESCRIPTOR_HANDLE rtv = !currentRenderTarget
? defaultRTV[presentFrameIndex % swapChainBufferCount]
: renderTargets[currentRenderTarget - 1].rtv;
commandList->ClearRenderTargetView(rtv, clearColor, 0, nullptr);
}
void QSGD3D12EnginePrivate::queueClearDepthStencil(float depthValue, quint8 stencilValue, QSGD3D12Engine::ClearFlags which)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
D3D12_CPU_DESCRIPTOR_HANDLE dsv = !currentRenderTarget
? defaultDSV
: renderTargets[currentRenderTarget - 1].dsv;
commandList->ClearDepthStencilView(dsv, D3D12_CLEAR_FLAGS(int(which)), depthValue, stencilValue, 0, nullptr);
}
void QSGD3D12EnginePrivate::queueSetBlendFactor(const QVector4D &factor)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
tframeData.blendFactor = factor;
const float f[4] = { factor.x(), factor.y(), factor.z(), factor.w() };
commandList->OMSetBlendFactor(f);
}
void QSGD3D12EnginePrivate::queueSetStencilRef(quint32 ref)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
tframeData.stencilRef = ref;
commandList->OMSetStencilRef(ref);
}
void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams &params)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
const bool skip = tframeData.scissor.isEmpty();
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
pfd.buffersUsedInDrawCallSet.insert(params.vertexBuf);
const int vertexBufIdx = params.vertexBuf - 1;
Q_ASSERT(params.vertexBuf && vertexBufIdx < buffers.count() && buffers[vertexBufIdx].entryInUse());
pfd.buffersUsedInDrawCallSet.insert(params.constantBuf);
const int constantBufIdx = params.constantBuf - 1;
Q_ASSERT(params.constantBuf && constantBufIdx < buffers.count() && buffers[constantBufIdx].entryInUse());
int indexBufIdx = -1;
if (params.indexBuf) {
pfd.buffersUsedInDrawCallSet.insert(params.indexBuf);
indexBufIdx = params.indexBuf - 1;
Q_ASSERT(indexBufIdx < buffers.count() && buffers[indexBufIdx].entryInUse());
}
// Ensure buffers are created but do not copy the data here, leave that to endDrawCalls().
ensureBuffer(&buffers[vertexBufIdx]);
ensureBuffer(&buffers[constantBufIdx]);
if (indexBufIdx >= 0)
ensureBuffer(&buffers[indexBufIdx]);
// Set the CBV.
if (!skip && params.cboOffset >= 0) {
ID3D12Resource *cbuf = buffers[constantBufIdx].d[currentPFrameIndex].buffer.Get();
if (cbuf)
commandList->SetGraphicsRootConstantBufferView(0, cbuf->GetGPUVirtualAddress() + params.cboOffset);
}
// Set up vertex and index buffers.
ID3D12Resource *vbuf = buffers[vertexBufIdx].d[currentPFrameIndex].buffer.Get();
ID3D12Resource *ibuf = indexBufIdx >= 0 && params.startIndexIndex >= 0
? buffers[indexBufIdx].d[currentPFrameIndex].buffer.Get() : nullptr;
if (!skip && params.mode != tframeData.drawingMode) {
D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
switch (params.mode) {
case QSGGeometry::DrawPoints:
topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
break;
case QSGGeometry::DrawLines:
topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
break;
case QSGGeometry::DrawLineStrip:
topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
break;
case QSGGeometry::DrawTriangles:
topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
break;
case QSGGeometry::DrawTriangleStrip:
topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
break;
default:
qFatal("Unsupported drawing mode 0x%x", params.mode);
break;
}
commandList->IASetPrimitiveTopology(topology);
tframeData.drawingMode = params.mode;
}
if (!skip) {
D3D12_VERTEX_BUFFER_VIEW vbv;
vbv.BufferLocation = vbuf->GetGPUVirtualAddress() + params.vboOffset;
vbv.SizeInBytes = params.vboSize;
vbv.StrideInBytes = params.vboStride;
// must be set after the topology
commandList->IASetVertexBuffers(0, 1, &vbv);
}
if (!skip && params.startIndexIndex >= 0 && ibuf && tframeData.currentIndexBuffer != params.indexBuf) {
tframeData.currentIndexBuffer = params.indexBuf;
D3D12_INDEX_BUFFER_VIEW ibv;
ibv.BufferLocation = ibuf->GetGPUVirtualAddress();
ibv.SizeInBytes = buffers[indexBufIdx].cpuDataRef.size;
ibv.Format = DXGI_FORMAT(params.indexFormat);
commandList->IASetIndexBuffer(&ibv);
}
// Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap.
Q_ASSERT(tframeData.activeTextureCount == tframeData.pipelineState.shaders.rootSig.textureViewCount);
if (tframeData.activeTextureCount > 0) {
if (!skip) {
ensureGPUDescriptorHeap(tframeData.activeTextureCount);
const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart();
dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
for (int i = 0; i < tframeData.activeTextureCount; ++i) {
const TransientFrameData::ActiveTexture &t(tframeData.activeTextures[i]);
Q_ASSERT(t.id);
const int idx = t.id - 1;
const bool isTex = t.type == TransientFrameData::ActiveTexture::TypeTexture;
device->CopyDescriptorsSimple(1, dst, isTex ? textures[idx].srv : renderTargets[idx].srv,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
dst.ptr += stride;
}
D3D12_GPU_DESCRIPTOR_HANDLE gpuAddr = pfd.gpuCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart();
gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
commandList->SetGraphicsRootDescriptorTable(1, gpuAddr);
pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextureCount;
}
tframeData.activeTextureCount = 0;
}
// Add the draw call.
if (!skip) {
++tframeData.drawCount;
if (params.startIndexIndex >= 0)
commandList->DrawIndexedInstanced(params.count, 1, params.startIndexIndex, 0, 0);
else
commandList->DrawInstanced(params.count, 1, 0, 0);
}
if (tframeData.drawCount == MAX_DRAW_CALLS_PER_LIST) {
if (Q_UNLIKELY(debug_render()))
qDebug("Limit of %d draw calls reached, executing command list", MAX_DRAW_CALLS_PER_LIST);
// submit the command list
endDrawCalls();
// start a new one
beginDrawCalls();
// prepare for the upcoming drawcalls
restoreFrameState();
}
}
void QSGD3D12EnginePrivate::ensureGPUDescriptorHeap(int cbvSrvUavDescriptorCount)
{
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
int newSize = pfd.gpuCbvSrvUavHeapSize;
while (pfd.cbvSrvUavNextFreeDescriptorIndex + cbvSrvUavDescriptorCount > newSize)
newSize *= 2;
if (newSize != pfd.gpuCbvSrvUavHeapSize) {
if (Q_UNLIKELY(debug_descheap()))
qDebug("Out of space for SRVs, creating new CBV-SRV-UAV descriptor heap with descriptor count %d", newSize);
deferredDelete(pfd.gpuCbvSrvUavHeap);
createCbvSrvUavHeap(currentPFrameIndex, newSize);
setDescriptorHeaps(true);
pfd.cbvSrvUavNextFreeDescriptorIndex = 0;
}
}
void QSGD3D12EnginePrivate::present()
{
if (!initialized)
return;
if (Q_UNLIKELY(debug_render()))
qDebug("--- present with vsync ---");
// This call will not block the CPU unless at least 3 buffers are queued,
// unless the waitable frame latency event is enabled. Then the latency of
// 3 is changed to whatever value desired, and blocking happens in
// beginFrame. If none of these hold, the fence-based wait in beginFrame
// throttles. Vsync (interval 1) is always enabled.
HRESULT hr = swapChain->Present(1, 0);
if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
deviceManager()->deviceLossDetected();
return;
} else if (FAILED(hr)) {
qWarning("Present failed: %s", qPrintable(comErrorMessage(hr)));
return;
}
#ifndef Q_OS_WINRT
if (dcompDevice)
dcompDevice->Commit();
#endif
++presentFrameIndex;
}
void QSGD3D12EnginePrivate::waitGPU()
{
if (!initialized)
return;
if (Q_UNLIKELY(debug_render()))
qDebug("--- blocking wait for GPU ---");
waitForGPU(presentFence);
}
template<class T> uint newId(T *tbl)
{
uint id = 0;
for (int i = 0; i < tbl->count(); ++i) {
if (!(*tbl)[i].entryInUse()) {
id = i + 1;
break;
}
}
if (!id) {
tbl->resize(tbl->size() + 1);
id = tbl->count();
}
(*tbl)[id - 1].flags = 0x01; // reset flags and set EntryInUse
return id;
}
template<class T> void syncEntryFlags(T *e, int flag, bool b)
{
if (b)
e->flags |= flag;
else
e->flags &= ~flag;
}
uint QSGD3D12EnginePrivate::genBuffer()
{
return newId(&buffers);
}
void QSGD3D12EnginePrivate::releaseBuffer(uint id)
{
if (!id || !initialized)
return;
const int idx = id - 1;
Q_ASSERT(idx < buffers.count());
if (Q_UNLIKELY(debug_buffer()))
qDebug("releasing buffer %u", id);
Buffer &b(buffers[idx]);
if (!b.entryInUse())
return;
// Do not null out and do not mark the entry reusable yet.
// Do that only when the frames potentially in flight have finished for sure.
for (int i = 0; i < frameInFlightCount; ++i) {
if (b.d[i].buffer)
deferredDelete(b.d[i].buffer);
}
QSet<PersistentFrameData::PendingRelease> *pendingReleasesSet = inFrame
? &pframeData[currentPFrameIndex].pendingReleases
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFramePendingReleases;
pendingReleasesSet->insert(PersistentFrameData::PendingRelease(PersistentFrameData::PendingRelease::TypeBuffer, id));
}
void QSGD3D12EnginePrivate::resetBuffer(uint id, const quint8 *data, int size)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
Q_ASSERT(id);
const int idx = id - 1;
Q_ASSERT(idx < buffers.count() && buffers[idx].entryInUse());
Buffer &b(buffers[idx]);
if (Q_UNLIKELY(debug_buffer()))
qDebug("reset buffer %u, size %d", id, size);
b.cpuDataRef.p = data;
b.cpuDataRef.size = size;
b.cpuDataRef.dirty.clear();
b.d[currentPFrameIndex].dirty.clear();
if (size > 0) {
const QPair<int, int> range = qMakePair(0, size);
b.cpuDataRef.dirty.append(range);
b.d[currentPFrameIndex].dirty.append(range);
}
}
void QSGD3D12EnginePrivate::addDirtyRange(DirtyList *dirty, int offset, int size, int bufferSize)
{
// Bail out when the dirty list already spans the entire buffer.
if (!dirty->isEmpty()) {
if (dirty->at(0).first == 0 && dirty->at(0).second == bufferSize)
return;
}
const QPair<int, int> range = qMakePair(offset, size);
if (!dirty->contains(range))
dirty->append(range);
}
void QSGD3D12EnginePrivate::markBufferDirty(uint id, int offset, int size)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
Q_ASSERT(id);
const int idx = id - 1;
Q_ASSERT(idx < buffers.count() && buffers[idx].entryInUse());
Buffer &b(buffers[idx]);
addDirtyRange(&b.cpuDataRef.dirty, offset, size, b.cpuDataRef.size);
addDirtyRange(&b.d[currentPFrameIndex].dirty, offset, size, b.cpuDataRef.size);
}
uint QSGD3D12EnginePrivate::genTexture()
{
const uint id = newId(&textures);
textures[id - 1].fenceValue = 0;
return id;
}
static inline DXGI_FORMAT textureFormat(QImage::Format format, bool wantsAlpha, bool mipmap, bool force32bit,
QImage::Format *imageFormat, int *bytesPerPixel)
{
DXGI_FORMAT f = DXGI_FORMAT_R8G8B8A8_UNORM;
QImage::Format convFormat = format;
int bpp = 4;
if (!mipmap) {
switch (format) {
case QImage::Format_Grayscale8:
case QImage::Format_Indexed8:
case QImage::Format_Alpha8:
if (!force32bit) {
f = DXGI_FORMAT_R8_UNORM;
bpp = 1;
} else {
convFormat = QImage::Format_RGBA8888;
}
break;
case QImage::Format_RGB32:
f = DXGI_FORMAT_B8G8R8A8_UNORM;
break;
case QImage::Format_ARGB32:
f = DXGI_FORMAT_B8G8R8A8_UNORM;
convFormat = wantsAlpha ? QImage::Format_ARGB32_Premultiplied : QImage::Format_RGB32;
break;
case QImage::Format_ARGB32_Premultiplied:
f = DXGI_FORMAT_B8G8R8A8_UNORM;
convFormat = wantsAlpha ? format : QImage::Format_RGB32;
break;
default:
convFormat = wantsAlpha ? QImage::Format_RGBA8888_Premultiplied : QImage::Format_RGBX8888;
break;
}
} else {
// Mipmap generation needs unordered access and BGRA is not an option for that. Stick to RGBA.
convFormat = wantsAlpha ? QImage::Format_RGBA8888_Premultiplied : QImage::Format_RGBX8888;
}
if (imageFormat)
*imageFormat = convFormat;
if (bytesPerPixel)
*bytesPerPixel = bpp;
return f;
}
static inline QImage::Format imageFormatForTexture(DXGI_FORMAT format)
{
QImage::Format f = QImage::Format_Invalid;
switch (format) {
case DXGI_FORMAT_R8G8B8A8_UNORM:
f = QImage::Format_RGBA8888_Premultiplied;
break;
case DXGI_FORMAT_B8G8R8A8_UNORM:
f = QImage::Format_ARGB32_Premultiplied;
break;
case DXGI_FORMAT_R8_UNORM:
f = QImage::Format_Grayscale8;
break;
default:
break;
}
return f;
}
void QSGD3D12EnginePrivate::createTexture(uint id, const QSize &size, QImage::Format format,
QSGD3D12Engine::TextureCreateFlags createFlags)
{
ensureDevice();
Q_ASSERT(id);
const int idx = id - 1;
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
Texture &t(textures[idx]);
syncEntryFlags(&t, Texture::Alpha, createFlags & QSGD3D12Engine::TextureWithAlpha);
syncEntryFlags(&t, Texture::MipMap, createFlags & QSGD3D12Engine::TextureWithMipMaps);
const QSize adjustedSize = !t.mipmap() ? size : QSGD3D12Engine::mipMapAdjustedSourceSize(size);
D3D12_HEAP_PROPERTIES defaultHeapProp = {};
defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
D3D12_RESOURCE_DESC textureDesc = {};
textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
textureDesc.Width = adjustedSize.width();
textureDesc.Height = adjustedSize.height();
textureDesc.DepthOrArraySize = 1;
textureDesc.MipLevels = !t.mipmap() ? 1 : QSGD3D12Engine::mipMapLevels(adjustedSize);
textureDesc.Format = textureFormat(format, t.alpha(), t.mipmap(),
createFlags.testFlag(QSGD3D12Engine::TextureAlways32Bit),
nullptr, nullptr);
textureDesc.SampleDesc.Count = 1;
textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
if (t.mipmap())
textureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc,
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&t.texture));
if (FAILED(hr)) {
qWarning("Failed to create texture resource: %s", qPrintable(comErrorMessage(hr)));
return;
}
t.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Format = textureDesc.Format;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MipLevels = textureDesc.MipLevels;
device->CreateShaderResourceView(t.texture.Get(), &srvDesc, t.srv);
if (t.mipmap()) {
// Mipmap generation will need an UAV for each level that needs to be generated.
t.mipUAVs.clear();
for (int level = 1; level < textureDesc.MipLevels; ++level) {
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
uavDesc.Format = textureDesc.Format;
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
uavDesc.Texture2D.MipSlice = level;
D3D12_CPU_DESCRIPTOR_HANDLE h = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
device->CreateUnorderedAccessView(t.texture.Get(), nullptr, &uavDesc, h);
t.mipUAVs.append(h);
}
}
if (Q_UNLIKELY(debug_texture()))
qDebug("created texture %u, size %dx%d, miplevels %d", id, adjustedSize.width(), adjustedSize.height(), textureDesc.MipLevels);
}
void QSGD3D12EnginePrivate::queueTextureResize(uint id, const QSize &size)
{
Q_ASSERT(id);
const int idx = id - 1;
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
Texture &t(textures[idx]);
if (!t.texture) {
qWarning("Cannot resize non-created texture %u", id);
return;
}
if (t.mipmap()) {
qWarning("Cannot resize mipmapped texture %u", id);
return;
}
if (Q_UNLIKELY(debug_texture()))
qDebug("resizing texture %u, size %dx%d", id, size.width(), size.height());
D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc();
textureDesc.Width = size.width();
textureDesc.Height = size.height();
D3D12_HEAP_PROPERTIES defaultHeapProp = {};
defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
ComPtr<ID3D12Resource> oldTexture = t.texture;
deferredDelete(t.texture);
HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc,
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&t.texture));
if (FAILED(hr)) {
qWarning("Failed to create resized texture resource: %s",
qPrintable(comErrorMessage(hr)));
return;
}
deferredDelete(t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
t.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Format = textureDesc.Format;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MipLevels = textureDesc.MipLevels;
device->CreateShaderResourceView(t.texture.Get(), &srvDesc, t.srv);
D3D12_TEXTURE_COPY_LOCATION dstLoc;
dstLoc.pResource = t.texture.Get();
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
dstLoc.SubresourceIndex = 0;
D3D12_TEXTURE_COPY_LOCATION srcLoc;
srcLoc.pResource = oldTexture.Get();
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
srcLoc.SubresourceIndex = 0;
copyCommandList->Reset(copyCommandAllocator.Get(), nullptr);
copyCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);
copyCommandList->Close();
ID3D12CommandList *commandLists[] = { copyCommandList.Get() };
copyCommandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
t.fenceValue = nextTextureUploadFenceValue.fetchAndAddAcquire(1) + 1;
copyCommandQueue->Signal(textureUploadFence.Get(), t.fenceValue);
if (Q_UNLIKELY(debug_texture()))
qDebug("submitted old content copy for texture %u on the copy queue, fence %llu", id, t.fenceValue);
}
void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QVector<QImage> &images, const QVector<QPoint> &dstPos,
QSGD3D12Engine::TextureUploadFlags flags)
{
Q_ASSERT(id);
Q_ASSERT(images.count() == dstPos.count());
if (images.isEmpty())
return;
const int idx = id - 1;
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
Texture &t(textures[idx]);
Q_ASSERT(t.texture);
// When mipmapping is not in use, image can be smaller than the size passed
// to createTexture() and dstPos can specify a non-zero destination position.
if (t.mipmap() && (images.count() != 1 || dstPos.count() != 1 || !dstPos[0].isNull())) {
qWarning("Mipmapped textures (%u) do not support partial uploads", id);
return;
}
// Make life simpler by disallowing queuing a new mipmapped upload before the previous one finishes.
if (t.mipmap() && t.fenceValue) {
qWarning("Attempted to queue mipmapped texture upload (%u) while a previous upload is still in progress", id);
return;
}
t.fenceValue = nextTextureUploadFenceValue.fetchAndAddAcquire(1) + 1;
if (Q_UNLIKELY(debug_texture()))
qDebug("adding upload for texture %u on the copy queue, fence %llu", id, t.fenceValue);
D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc();
const QSize adjustedTextureSize(textureDesc.Width, textureDesc.Height);
int totalSize = 0;
for (const QImage &image : images) {
int bytesPerPixel;
textureFormat(image.format(), t.alpha(), t.mipmap(),
flags.testFlag(QSGD3D12Engine::TextureUploadAlways32Bit),
nullptr, &bytesPerPixel);
const int w = !t.mipmap() ? image.width() : adjustedTextureSize.width();
const int h = !t.mipmap() ? image.height() : adjustedTextureSize.height();
const int stride = alignedSize(w * bytesPerPixel, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
totalSize += alignedSize(h * stride, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
}
if (Q_UNLIKELY(debug_texture()))
qDebug("%d sub-uploads, heap size %d bytes", images.count(), totalSize);
// Instead of individual committed resources for each upload buffer,
// allocate only once and use placed resources.
D3D12_HEAP_PROPERTIES uploadHeapProp = {};
uploadHeapProp.Type = D3D12_HEAP_TYPE_UPLOAD;
D3D12_HEAP_DESC uploadHeapDesc = {};
uploadHeapDesc.SizeInBytes = totalSize;
uploadHeapDesc.Properties = uploadHeapProp;
uploadHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
Texture::StagingHeap sheap;
if (FAILED(device->CreateHeap(&uploadHeapDesc, IID_PPV_ARGS(&sheap.heap)))) {
qWarning("Failed to create texture upload heap of size %d", totalSize);
return;
}
t.stagingHeaps.append(sheap);
copyCommandList->Reset(copyCommandAllocator.Get(), nullptr);
int placedOffset = 0;
for (int i = 0; i < images.count(); ++i) {
QImage::Format convFormat;
int bytesPerPixel;
textureFormat(images[i].format(), t.alpha(), t.mipmap(),
flags.testFlag(QSGD3D12Engine::TextureUploadAlways32Bit),
&convFormat, &bytesPerPixel);
if (Q_UNLIKELY(debug_texture() && i == 0))
qDebug("source image format %d, target format %d, bpp %d", images[i].format(), convFormat, bytesPerPixel);
QImage convImage = images[i].format() == convFormat ? images[i] : images[i].convertToFormat(convFormat);
if (t.mipmap() && adjustedTextureSize != convImage.size())
convImage = convImage.scaled(adjustedTextureSize, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
const int stride = alignedSize(convImage.width() * bytesPerPixel, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
D3D12_RESOURCE_DESC bufDesc = {};
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
bufDesc.Width = stride * convImage.height();
bufDesc.Height = 1;
bufDesc.DepthOrArraySize = 1;
bufDesc.MipLevels = 1;
bufDesc.Format = DXGI_FORMAT_UNKNOWN;
bufDesc.SampleDesc.Count = 1;
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
Texture::StagingBuffer sbuf;
if (FAILED(device->CreatePlacedResource(sheap.heap.Get(), placedOffset,
&bufDesc, D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr, IID_PPV_ARGS(&sbuf.buffer)))) {
qWarning("Failed to create texture upload buffer");
return;
}
quint8 *p = nullptr;
const D3D12_RANGE readRange = { 0, 0 };
if (FAILED(sbuf.buffer->Map(0, &readRange, reinterpret_cast<void **>(&p)))) {
qWarning("Map failed (texture upload buffer)");
return;
}
for (int y = 0, ye = convImage.height(); y < ye; ++y) {
memcpy(p, convImage.constScanLine(y), convImage.width() * bytesPerPixel);
p += stride;
}
sbuf.buffer->Unmap(0, nullptr);
D3D12_TEXTURE_COPY_LOCATION dstLoc;
dstLoc.pResource = t.texture.Get();
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
dstLoc.SubresourceIndex = 0;
D3D12_TEXTURE_COPY_LOCATION srcLoc;
srcLoc.pResource = sbuf.buffer.Get();
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
srcLoc.PlacedFootprint.Offset = 0;
srcLoc.PlacedFootprint.Footprint.Format = textureDesc.Format;
srcLoc.PlacedFootprint.Footprint.Width = convImage.width();
srcLoc.PlacedFootprint.Footprint.Height = convImage.height();
srcLoc.PlacedFootprint.Footprint.Depth = 1;
srcLoc.PlacedFootprint.Footprint.RowPitch = stride;
copyCommandList->CopyTextureRegion(&dstLoc, dstPos[i].x(), dstPos[i].y(), 0, &srcLoc, nullptr);
t.stagingBuffers.append(sbuf);
placedOffset += alignedSize(bufDesc.Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
}
copyCommandList->Close();
ID3D12CommandList *commandLists[] = { copyCommandList.Get() };
copyCommandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
copyCommandQueue->Signal(textureUploadFence.Get(), t.fenceValue);
}
void QSGD3D12EnginePrivate::releaseTexture(uint id)
{
if (!id || !initialized)
return;
const int idx = id - 1;
Q_ASSERT(idx < textures.count());
if (Q_UNLIKELY(debug_texture()))
qDebug("releasing texture %d", id);
Texture &t(textures[idx]);
if (!t.entryInUse())
return;
if (t.texture) {
deferredDelete(t.texture);
deferredDelete(t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
for (D3D12_CPU_DESCRIPTOR_HANDLE h : t.mipUAVs)
deferredDelete(h, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
QSet<PersistentFrameData::PendingRelease> *pendingReleasesSet = inFrame
? &pframeData[currentPFrameIndex].pendingReleases
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFramePendingReleases;
pendingReleasesSet->insert(PersistentFrameData::PendingRelease(PersistentFrameData::PendingRelease::TypeTexture, id));
}
void QSGD3D12EnginePrivate::useTexture(uint id)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
Q_ASSERT(id);
const int idx = id - 1;
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
// Within one frame the order of calling this function determines the
// texture register (0, 1, ...) so fill up activeTextures accordingly.
tframeData.activeTextures[tframeData.activeTextureCount++]
= TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id);
if (textures[idx].fenceValue)
pframeData[currentPFrameIndex].pendingTextureUploads.insert(id);
}
bool QSGD3D12EnginePrivate::MipMapGen::initialize(QSGD3D12EnginePrivate *enginePriv)
{
engine = enginePriv;
D3D12_STATIC_SAMPLER_DESC sampler = {};
sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sampler.MinLOD = 0.0f;
sampler.MaxLOD = D3D12_FLOAT32_MAX;
D3D12_DESCRIPTOR_RANGE descRange[2];
descRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
descRange[0].NumDescriptors = 1;
descRange[0].BaseShaderRegister = 0; // t0
descRange[0].RegisterSpace = 0;
descRange[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
descRange[1].NumDescriptors = 4;
descRange[1].BaseShaderRegister = 0; // u0..u3
descRange[1].RegisterSpace = 0;
descRange[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
// Split into two to allow switching between the first and second set of UAVs later.
D3D12_ROOT_PARAMETER rootParameters[3];
rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
rootParameters[0].DescriptorTable.NumDescriptorRanges = 1;
rootParameters[0].DescriptorTable.pDescriptorRanges = &descRange[0];
rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
rootParameters[1].DescriptorTable.NumDescriptorRanges = 1;
rootParameters[1].DescriptorTable.pDescriptorRanges = &descRange[1];
rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
rootParameters[2].Constants.Num32BitValues = 4; // uint2 mip1Size, uint sampleLevel, uint totalMips
rootParameters[2].Constants.ShaderRegister = 0; // b0
rootParameters[2].Constants.RegisterSpace = 0;
D3D12_ROOT_SIGNATURE_DESC desc = {};
desc.NumParameters = 3;
desc.pParameters = rootParameters;
desc.NumStaticSamplers = 1;
desc.pStaticSamplers = &sampler;
ComPtr<ID3DBlob> signature;
ComPtr<ID3DBlob> error;
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) {
QByteArray msg(static_cast<const char *>(error->GetBufferPointer()), error->GetBufferSize());
qWarning("Failed to serialize compute root signature: %s", qPrintable(msg));
return false;
}
if (FAILED(engine->device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(),
IID_PPV_ARGS(&rootSig)))) {
qWarning("Failed to create compute root signature");
return false;
}
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.pRootSignature = rootSig.Get();
psoDesc.CS.pShaderBytecode = g_CS_Generate4MipMaps;
psoDesc.CS.BytecodeLength = sizeof(g_CS_Generate4MipMaps);
if (FAILED(engine->device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&pipelineState)))) {
qWarning("Failed to create compute pipeline state");
return false;
}
return true;
}
void QSGD3D12EnginePrivate::MipMapGen::releaseResources()
{
pipelineState = nullptr;
rootSig = nullptr;
}
// The mipmap generator is used to insert commands on the main 3D queue. It is
// guaranteed that the queue has a wait for the base texture level upload
// before invoking queueGenerate(). There can be any number of invocations
// without waiting for earlier ones to finish. finished() is invoked when it is
// known for sure that frame containing the upload and mipmap generation has
// finished on the GPU.
void QSGD3D12EnginePrivate::MipMapGen::queueGenerate(const Texture &t)
{
D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc();
engine->commandList->SetPipelineState(pipelineState.Get());
engine->commandList->SetComputeRootSignature(rootSig.Get());
// 1 SRV + (miplevels - 1) UAVs
const int descriptorCount = 1 + (textureDesc.MipLevels - 1);
engine->ensureGPUDescriptorHeap(descriptorCount);
// The descriptor heap is set on the command list either because the
// ensure() call above resized, or, typically, due to a texture-dependent
// draw call earlier.
engine->transitionResource(t.texture.Get(), engine->commandList,
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
QSGD3D12EnginePrivate::PersistentFrameData &pfd(engine->pframeData[engine->currentPFrameIndex]);
const uint stride = engine->cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_CPU_DESCRIPTOR_HANDLE h = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart();
h.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
engine->device->CopyDescriptorsSimple(1, h, t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
h.ptr += stride;
for (int level = 1; level < textureDesc.MipLevels; ++level, h.ptr += stride)
engine->device->CopyDescriptorsSimple(1, h, t.mipUAVs[level - 1], D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_GPU_DESCRIPTOR_HANDLE gpuAddr = pfd.gpuCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart();
gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
engine->commandList->SetComputeRootDescriptorTable(0, gpuAddr);
gpuAddr.ptr += stride; // now points to the first UAV
for (int level = 1; level < textureDesc.MipLevels; level += 4, gpuAddr.ptr += stride * 4) {
engine->commandList->SetComputeRootDescriptorTable(1, gpuAddr);
QSize sz(textureDesc.Width, textureDesc.Height);
sz.setWidth(qMax(1, sz.width() >> level));
sz.setHeight(qMax(1, sz.height() >> level));
const quint32 constants[4] = { quint32(sz.width()), quint32(sz.height()),
quint32(level - 1),
quint32(textureDesc.MipLevels - 1) };
engine->commandList->SetComputeRoot32BitConstants(2, 4, constants, 0);
engine->commandList->Dispatch(sz.width(), sz.height(), 1);
engine->uavBarrier(t.texture.Get(), engine->commandList);
}
engine->transitionResource(t.texture.Get(), engine->commandList,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
pfd.cbvSrvUavNextFreeDescriptorIndex += descriptorCount;
}
void QSGD3D12EnginePrivate::deferredDelete(ComPtr<ID3D12Resource> res)
{
PersistentFrameData::DeleteQueueEntry e;
e.res = res;
QVector<PersistentFrameData::DeleteQueueEntry> *dq = inFrame
? &pframeData[currentPFrameIndex].deleteQueue
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue;
(*dq) << e;
}
void QSGD3D12EnginePrivate::deferredDelete(ComPtr<ID3D12DescriptorHeap> dh)
{
PersistentFrameData::DeleteQueueEntry e;
e.descHeap = dh;
QVector<PersistentFrameData::DeleteQueueEntry> *dq = inFrame
? &pframeData[currentPFrameIndex].deleteQueue
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue;
(*dq) << e;
}
void QSGD3D12EnginePrivate::deferredDelete(D3D12_CPU_DESCRIPTOR_HANDLE h, D3D12_DESCRIPTOR_HEAP_TYPE type)
{
PersistentFrameData::DeleteQueueEntry e;
e.cpuDescriptorPtr = h.ptr;
e.descHeapType = type;
QVector<PersistentFrameData::DeleteQueueEntry> *dq = inFrame
? &pframeData[currentPFrameIndex].deleteQueue
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue;
(*dq) << e;
}
uint QSGD3D12EnginePrivate::genRenderTarget()
{
return newId(&renderTargets);
}
void QSGD3D12EnginePrivate::createRenderTarget(uint id, const QSize &size, const QVector4D &clearColor, uint samples)
{
ensureDevice();
Q_ASSERT(id);
const int idx = id - 1;
Q_ASSERT(idx < renderTargets.count() && renderTargets[idx].entryInUse());
RenderTarget &rt(renderTargets[idx]);
rt.rtv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
rt.dsv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
rt.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
ID3D12Resource *res = createColorBuffer(rt.rtv, size, clearColor, samples);
if (res)
rt.color.Attach(res);
ID3D12Resource *dsres = createDepthStencil(rt.dsv, size, samples);
if (dsres)
rt.ds.Attach(dsres);
const bool multisample = rt.color->GetDesc().SampleDesc.Count > 1;
syncEntryFlags(&rt, RenderTarget::Multisample, multisample);
if (!multisample) {
device->CreateShaderResourceView(rt.color.Get(), nullptr, rt.srv);
} else {
D3D12_HEAP_PROPERTIES defaultHeapProp = {};
defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
D3D12_RESOURCE_DESC textureDesc = {};
textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
textureDesc.Width = size.width();
textureDesc.Height = size.height();
textureDesc.DepthOrArraySize = 1;
textureDesc.MipLevels = 1;
textureDesc.Format = RT_COLOR_FORMAT;
textureDesc.SampleDesc.Count = 1;
textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc,
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&rt.colorResolve));
if (FAILED(hr)) {
qWarning("Failed to create resolve buffer: %s",
qPrintable(comErrorMessage(hr)));
return;
}
device->CreateShaderResourceView(rt.colorResolve.Get(), nullptr, rt.srv);
}
if (Q_UNLIKELY(debug_render()))
qDebug("created new render target %u, size %dx%d, samples %d", id, size.width(), size.height(), samples);
}
void QSGD3D12EnginePrivate::releaseRenderTarget(uint id)
{
if (!id || !initialized)
return;
const int idx = id - 1;
Q_ASSERT(idx < renderTargets.count());
RenderTarget &rt(renderTargets[idx]);
if (!rt.entryInUse())
return;
if (Q_UNLIKELY(debug_render()))
qDebug("releasing render target %u", id);
if (rt.colorResolve) {
deferredDelete(rt.colorResolve);
rt.colorResolve = nullptr;
}
if (rt.color) {
deferredDelete(rt.color);
rt.color = nullptr;
deferredDelete(rt.rtv, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
deferredDelete(rt.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
if (rt.ds) {
deferredDelete(rt.ds);
rt.ds = nullptr;
deferredDelete(rt.dsv, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
}
rt.flags &= ~RenderTarget::EntryInUse;
}
void QSGD3D12EnginePrivate::useRenderTargetAsTexture(uint id)
{
if (!inFrame) {
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
return;
}
Q_ASSERT(id);
const int idx = id - 1;
Q_ASSERT(idx < renderTargets.count());
RenderTarget &rt(renderTargets[idx]);
Q_ASSERT(rt.entryInUse() && rt.color);
if (rt.flags & RenderTarget::NeedsReadBarrier) {
rt.flags &= ~RenderTarget::NeedsReadBarrier;
if (rt.flags & RenderTarget::Multisample)
resolveMultisampledTarget(rt.color.Get(), rt.colorResolve.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, commandList);
else
transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
tframeData.activeTextures[tframeData.activeTextureCount++] =
TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id);
}
QImage QSGD3D12EnginePrivate::executeAndWaitReadbackRenderTarget(uint id)
{
// Readback due to QQuickWindow::grabWindow() happens outside
// begin-endFrame, but QQuickItemGrabResult leads to rendering a layer
// without a real frame afterwards and triggering readback. This has to be
// supported as well.
if (inFrame && (!activeLayers || currentLayerDepth)) {
qWarning("%s: Cannot be called while frame preparation is active", __FUNCTION__);
return QImage();
}
// Due to the above we insert a fake "real" frame when a layer was just rendered into.
if (inFrame) {
beginFrame();
endFrame();
}
frameCommandList->Reset(frameCommandAllocator[frameIndex % frameInFlightCount].Get(), nullptr);
D3D12_RESOURCE_STATES bstate;
bool needsBarrier = false;
ID3D12Resource *rtRes;
if (id == 0) {
const int idx = presentFrameIndex % swapChainBufferCount;
if (windowSamples > 1) {
resolveMultisampledTarget(defaultRT[idx].Get(), backBufferRT[idx].Get(),
D3D12_RESOURCE_STATE_COPY_SOURCE, frameCommandList.Get());
} else {
bstate = D3D12_RESOURCE_STATE_PRESENT;
needsBarrier = true;
}
rtRes = backBufferRT[idx].Get();
} else {
const int idx = id - 1;
Q_ASSERT(idx < renderTargets.count());
RenderTarget &rt(renderTargets[idx]);
Q_ASSERT(rt.entryInUse() && rt.color);
if (rt.flags & RenderTarget::Multisample) {
resolveMultisampledTarget(rt.color.Get(), rt.colorResolve.Get(),
D3D12_RESOURCE_STATE_COPY_SOURCE, frameCommandList.Get());
rtRes = rt.colorResolve.Get();
} else {
rtRes = rt.color.Get();
bstate = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
needsBarrier = true;
}
}
ComPtr<ID3D12Resource> readbackBuf;
D3D12_RESOURCE_DESC rtDesc = rtRes->GetDesc();
UINT64 textureByteSize = 0;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT textureLayout = {};
device->GetCopyableFootprints(&rtDesc, 0, 1, 0, &textureLayout, nullptr, nullptr, &textureByteSize);
D3D12_HEAP_PROPERTIES heapProp = {};
heapProp.Type = D3D12_HEAP_TYPE_READBACK;
D3D12_RESOURCE_DESC bufDesc = {};
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
bufDesc.Width = textureByteSize;
bufDesc.Height = 1;
bufDesc.DepthOrArraySize = 1;
bufDesc.MipLevels = 1;
bufDesc.Format = DXGI_FORMAT_UNKNOWN;
bufDesc.SampleDesc.Count = 1;
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &bufDesc,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&readbackBuf)))) {
qWarning("Failed to create committed resource (readback buffer)");
return QImage();
}
D3D12_TEXTURE_COPY_LOCATION dstLoc;
dstLoc.pResource = readbackBuf.Get();
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dstLoc.PlacedFootprint = textureLayout;
D3D12_TEXTURE_COPY_LOCATION srcLoc;
srcLoc.pResource = rtRes;
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
srcLoc.SubresourceIndex = 0;
ID3D12GraphicsCommandList *cl = frameCommandList.Get();
if (needsBarrier)
transitionResource(rtRes, cl, bstate, D3D12_RESOURCE_STATE_COPY_SOURCE);
cl->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);
if (needsBarrier)
transitionResource(rtRes, cl, D3D12_RESOURCE_STATE_COPY_SOURCE, bstate);
cl->Close();
ID3D12CommandList *commandLists[] = { cl };
commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
QScopedPointer<QSGD3D12CPUWaitableFence> f(createCPUWaitableFence());
waitForGPU(f.data()); // uh oh
QImage::Format fmt = imageFormatForTexture(rtDesc.Format);
if (fmt == QImage::Format_Invalid) {
qWarning("Could not map render target format %d to a QImage format", rtDesc.Format);
return QImage();
}
QImage img(rtDesc.Width, rtDesc.Height, fmt);
quint8 *p = nullptr;
const D3D12_RANGE readRange = { 0, 0 };
if (FAILED(readbackBuf->Map(0, &readRange, reinterpret_cast<void **>(&p)))) {
qWarning("Mapping the readback buffer failed");
return QImage();
}
const int bpp = 4; // ###
if (id == 0) {
for (UINT y = 0; y < rtDesc.Height; ++y) {
quint8 *dst = img.scanLine(y);
memcpy(dst, p, rtDesc.Width * bpp);
p += textureLayout.Footprint.RowPitch;
}
} else {
for (int y = rtDesc.Height - 1; y >= 0; --y) {
quint8 *dst = img.scanLine(y);
memcpy(dst, p, rtDesc.Width * bpp);
p += textureLayout.Footprint.RowPitch;
}
}
readbackBuf->Unmap(0, nullptr);
return img;
}
void QSGD3D12EnginePrivate::simulateDeviceLoss()
{
qWarning("QSGD3D12Engine: Triggering device loss via TDR");
devLossTest.killDevice();
}
bool QSGD3D12EnginePrivate::DeviceLossTester::initialize(QSGD3D12EnginePrivate *enginePriv)
{
engine = enginePriv;
#ifdef DEVLOSS_TEST
D3D12_DESCRIPTOR_RANGE descRange[2];
descRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
descRange[0].NumDescriptors = 1;
descRange[0].BaseShaderRegister = 0;
descRange[0].RegisterSpace = 0;
descRange[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
descRange[1].NumDescriptors = 1;
descRange[1].BaseShaderRegister = 0;
descRange[1].RegisterSpace = 0;
descRange[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
D3D12_ROOT_PARAMETER param;
param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
param.DescriptorTable.NumDescriptorRanges = 2;
param.DescriptorTable.pDescriptorRanges = descRange;
D3D12_ROOT_SIGNATURE_DESC desc = {};
desc.NumParameters = 1;
desc.pParameters = &param;
ComPtr<ID3DBlob> signature;
ComPtr<ID3DBlob> error;
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) {
QByteArray msg(static_cast<const char *>(error->GetBufferPointer()), error->GetBufferSize());
qWarning("Failed to serialize compute root signature: %s", qPrintable(msg));
return false;
}
if (FAILED(engine->device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(),
IID_PPV_ARGS(&computeRootSignature)))) {
qWarning("Failed to create compute root signature");
return false;
}
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.pRootSignature = computeRootSignature.Get();
psoDesc.CS.pShaderBytecode = g_timeout;
psoDesc.CS.BytecodeLength = sizeof(g_timeout);
if (FAILED(engine->device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&computeState)))) {
qWarning("Failed to create compute pipeline state");
return false;
}
#endif
return true;
}
void QSGD3D12EnginePrivate::DeviceLossTester::releaseResources()
{
computeState = nullptr;
computeRootSignature = nullptr;
}
void QSGD3D12EnginePrivate::DeviceLossTester::killDevice()
{
#ifdef DEVLOSS_TEST
ID3D12CommandAllocator *ca = engine->frameCommandAllocator[engine->frameIndex % engine->frameInFlightCount].Get();
ID3D12GraphicsCommandList *cl = engine->frameCommandList.Get();
cl->Reset(ca, computeState.Get());
cl->SetComputeRootSignature(computeRootSignature.Get());
cl->Dispatch(256, 1, 1);
cl->Close();
ID3D12CommandList *commandLists[] = { cl };
engine->commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
engine->waitGPU();
#endif
}
void *QSGD3D12EnginePrivate::getResource(QSGRendererInterface::Resource resource) const
{
switch (resource) {
case QSGRendererInterface::DeviceResource:
return device;
case QSGRendererInterface::CommandQueueResource:
return commandQueue.Get();
case QSGRendererInterface::CommandListResource:
return commandList;
default:
break;
}
return nullptr;
}
QT_END_NAMESPACE