GS/Vulkan: Purge threaded presentation

PCSX2 · May 27, 2024 · c94282c · c94282c
1 parent d94f1dd
commit c94282c
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 203 deletions.
diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp
@@ -227,7 +227,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
 	SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useDebugDevice, "EmuCore/GS", "UseDebugDevice", false);
 	SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.skipPresentingDuplicateFrames, "EmuCore/GS", "SkipDuplicateFrames", false);
 	SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableMailboxPresentation, "EmuCore/GS", "DisableMailboxPresentation", false);
-	SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.threadedPresentation, "EmuCore/GS", "DisableThreadedPresentation", false);
 	SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.exclusiveFullscreenControl, "EmuCore/GS", "ExclusiveFullscreenControl", -1, -1);
 	SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1);
 	SettingWidgetBinder::BindWidgetToIntSetting(
@@ -335,7 +334,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
 		m_ui.useBlitSwapChain = nullptr;
 		m_ui.disableMailboxPresentation = nullptr;
 		m_ui.skipPresentingDuplicateFrames = nullptr;
-		m_ui.threadedPresentation = nullptr;
 		m_ui.overrideTextureBarriers = nullptr;
 		m_ui.disableFramebufferFetch = nullptr;
 		m_ui.disableShaderCache = nullptr;
@@ -767,11 +765,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
 			tr("Forces the use of FIFO over Mailbox presentation, i.e. double buffering instead of triple buffering. "
 			   "Usually results in worse frame pacing."));
 
-		dialog->registerWidgetHelp(m_ui.threadedPresentation, tr("Disable Threaded Presentation"), tr("Unchecked"),
-			tr("Presents frames on the main GS thread instead of a worker thread. Used for debugging frametime issues. "
-			   "Could reduce chance of missing a frame or reduce tearing at the expense of more erratic frame times. "
-			   "Only applies to the Vulkan renderer."));
-
 		dialog->registerWidgetHelp(m_ui.useDebugDevice, tr("Enable Debug Device"), tr("Unchecked"),
 			tr("Enables API-level validation of graphics commands."));
 

diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui
@@ -2052,13 +2052,6 @@
              </widget>
             </item>
             <item row="1" column="0">
-             <widget class="QCheckBox" name="threadedPresentation">
-              <property name="text">
-               <string>Disable Threaded Presentation</string>
-              </property>
-             </widget>
-            </item>
-            <item row="1" column="1">
              <widget class="QCheckBox" name="disableMailboxPresentation">
               <property name="text">
                <string>Disable Mailbox Presentation</string>

diff --git a/pcsx2/Config.h b/pcsx2/Config.h
@@ -602,7 +602,6 @@ struct Pcsx2Config
 					DisableShaderCache : 1,
 					DisableFramebufferFetch : 1,
 					DisableVertexShaderExpand : 1,
-					DisableThreadedPresentation : 1,
 					SkipDuplicateFrames : 1,
 					OsdShowMessages : 1,
 					OsdShowSpeed : 1,

diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
@@ -1008,7 +1008,6 @@ void GSDeviceVK::WaitForFenceCounter(u64 fence_counter)
 
 void GSDeviceVK::WaitForGPUIdle()
 {
-	WaitForPresentComplete();
 	vkDeviceWaitIdle(m_device);
 }
 
@@ -1050,19 +1049,12 @@ void GSDeviceVK::ScanForCommandBufferCompletion()
 
 void GSDeviceVK::WaitForCommandBufferCompletion(u32 index)
 {
-	// We might be waiting for the buffer we just submitted to the worker thread.
-	if (m_queued_present.command_buffer_index == index && !m_present_done.load(std::memory_order_acquire))
-	{
-		Console.WarningFmt("Waiting for threaded submission of cmdbuffer {}", index);
-		WaitForPresentComplete();
-	}
-
 	// Wait for this command buffer to be completed.
 	const VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX);
 	if (res != VK_SUCCESS)
 	{
 		LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
-		m_last_submit_failed.store(true, std::memory_order_release);
+		m_last_submit_failed = true;
 		return;
 	}
 
@@ -1085,8 +1077,7 @@ void GSDeviceVK::WaitForCommandBufferCompletion(u32 index)
 	m_completed_fence_counter = now_completed_counter;
 }
 
-void GSDeviceVK::SubmitCommandBuffer(
-	VKSwapChain* present_swap_chain /* = nullptr */, bool submit_on_thread /* = false */)
+void GSDeviceVK::SubmitCommandBuffer(VKSwapChain* present_swap_chain)
 {
 	FrameResources& resources = m_frame_resources[m_current_frame];
 
@@ -1154,33 +1145,9 @@ void GSDeviceVK::SubmitCommandBuffer(
 	if (spin_cycles != 0)
 		WaitForSpinCompletion(m_current_frame);
 
-	std::unique_lock<std::mutex> lock(m_present_mutex);
-	WaitForPresentComplete(lock);
-
 	if (spin_enabled && m_optional_extensions.vk_ext_calibrated_timestamps)
 		resources.submit_timestamp = GetCPUTimestamp();
 
-	// Don't use threaded presentation when spinning is enabled. ScanForCommandBufferCompletion()
-	// calls vkGetFenceStatus(), which reads a fence that has been passed off to the thread.
-	if (!submit_on_thread || GSConfig.HWSpinGPUForReadbacks || !m_present_thread.joinable())
-	{
-		DoSubmitCommandBuffer(m_current_frame, present_swap_chain, spin_cycles);
-		if (present_swap_chain)
-			DoPresent(present_swap_chain);
-		return;
-	}
-
-	m_queued_present.command_buffer_index = m_current_frame;
-	m_queued_present.swap_chain = present_swap_chain;
-	m_queued_present.spin_cycles = spin_cycles;
-	m_present_done.store(false, std::memory_order_release);
-	m_present_queued_cv.notify_one();
-}
-
-void GSDeviceVK::DoSubmitCommandBuffer(u32 index, VKSwapChain* present_swap_chain, u32 spin_cycles)
-{
-	FrameResources& resources = m_frame_resources[index];
-
 	uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
 	VkSemaphore semas[2];
 	VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
@@ -1197,7 +1164,7 @@ void GSDeviceVK::DoSubmitCommandBuffer(u32 index, VKSwapChain* present_swap_chai
 		if (spin_cycles != 0)
 		{
 			semas[0] = present_swap_chain->GetRenderingFinishedSemaphore();
-			semas[1] = m_spin_resources[index].semaphore;
+			semas[1] = m_spin_resources[m_current_frame].semaphore;
 			submit_info.signalSemaphoreCount = 2;
 			submit_info.pSignalSemaphores = semas;
 		}
@@ -1210,105 +1177,44 @@ void GSDeviceVK::DoSubmitCommandBuffer(u32 index, VKSwapChain* present_swap_chai
 	else if (spin_cycles != 0)
 	{
 		submit_info.signalSemaphoreCount = 1;
-		submit_info.pSignalSemaphores = &m_spin_resources[index].semaphore;
+		submit_info.pSignalSemaphores = &m_spin_resources[m_current_frame].semaphore;
 	}
 
-	const VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
+	res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
 	if (res != VK_SUCCESS)
 	{
 		LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
-		m_last_submit_failed.store(true, std::memory_order_release);
+		m_last_submit_failed = true;
 		return;
 	}
 
 	if (spin_cycles != 0)
-		SubmitSpinCommand(index, spin_cycles);
-}
-
-void GSDeviceVK::DoPresent(VKSwapChain* present_swap_chain)
-{
-	const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1,
-		present_swap_chain->GetRenderingFinishedSemaphorePtr(), 1, present_swap_chain->GetSwapChainPtr(),
-		present_swap_chain->GetCurrentImageIndexPtr(), nullptr};
-
-	present_swap_chain->ReleaseCurrentImage();
-
-	const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
-	if (res != VK_SUCCESS)
-	{
-		// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
-		if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
-			LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
-
-		m_last_present_failed.store(true, std::memory_order_release);
-		return;
-	}
-
-	// Grab the next image as soon as possible, that way we spend less time blocked on the next
-	// submission. Don't care if it fails, we'll deal with that at the presentation call site.
-	// Credit to dxvk for the idea.
-	present_swap_chain->AcquireNextImage();
-}
-
-void GSDeviceVK::WaitForPresentComplete()
-{
-	if (m_present_done.load(std::memory_order_acquire))
-		return;
-
-	std::unique_lock<std::mutex> lock(m_present_mutex);
-	WaitForPresentComplete(lock);
-}
-
-void GSDeviceVK::WaitForPresentComplete(std::unique_lock<std::mutex>& lock)
-{
-	if (m_present_done.load(std::memory_order_acquire))
-		return;
+		SubmitSpinCommand(m_current_frame, spin_cycles);
 
-	m_present_done_cv.wait(lock, [this]() { return m_present_done.load(std::memory_order_acquire); });
-}
-
-void GSDeviceVK::PresentThread()
-{
-	std::unique_lock<std::mutex> lock(m_present_mutex);
-	while (!m_present_thread_done.load(std::memory_order_acquire))
+	if (present_swap_chain)
 	{
-		m_present_queued_cv.wait(lock, [this]() {
-			return !m_present_done.load(std::memory_order_acquire) ||
-				   m_present_thread_done.load(std::memory_order_acquire);
-		});
+		const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1,
+			present_swap_chain->GetRenderingFinishedSemaphorePtr(), 1, present_swap_chain->GetSwapChainPtr(),
+			present_swap_chain->GetCurrentImageIndexPtr(), nullptr};
 
-		if (m_present_done.load(std::memory_order_acquire))
-			continue;
-
-		DoSubmitCommandBuffer(
-			m_queued_present.command_buffer_index, m_queued_present.swap_chain, m_queued_present.spin_cycles);
-		if (m_queued_present.swap_chain)
-			DoPresent(m_queued_present.swap_chain);
-		m_present_done.store(true, std::memory_order_release);
-		m_present_done_cv.notify_one();
-	}
-}
+		present_swap_chain->ReleaseCurrentImage();
 
-void GSDeviceVK::StartPresentThread()
-{
-	pxAssert(!m_present_thread.joinable());
-	m_present_thread_done.store(false, std::memory_order_release);
-	m_present_thread = std::thread(&GSDeviceVK::PresentThread, this);
-}
+		const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
+		if (res != VK_SUCCESS)
+		{
+			// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
+			if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
+				LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
 
-void GSDeviceVK::StopPresentThread()
-{
-	if (!m_present_thread.joinable())
-		return;
+			m_last_present_failed = true;
+			return;
+		}
 
-	{
-		std::unique_lock<std::mutex> lock(m_present_mutex);
-		WaitForPresentComplete(lock);
-		m_present_thread_done.store(true, std::memory_order_release);
-		m_present_queued_cv.notify_one();
+		// Grab the next image as soon as possible, that way we spend less time blocked on the next
+		// submission. Don't care if it fails, we'll deal with that at the presentation call site.
+		// Credit to dxvk for the idea.
+		present_swap_chain->AcquireNextImage();
 	}
-
-	m_present_thread.join();
 }
 
 void GSDeviceVK::CommandBufferCompleted(u32 index)
@@ -1411,12 +1317,11 @@ void GSDeviceVK::ActivateCommandBuffer(u32 index)
 
 void GSDeviceVK::ExecuteCommandBuffer(WaitType wait_for_completion)
 {
-	if (m_last_submit_failed.load(std::memory_order_acquire))
+	if (m_last_submit_failed)
 		return;
 
-	// If we're waiting for completion, don't bother waking the worker thread.
 	const u32 current_frame = m_current_frame;
-	SubmitCommandBuffer();
+	SubmitCommandBuffer(nullptr);
 	MoveToNextCommandBuffer();
 
 	if (wait_for_completion != WaitType::None)
@@ -1433,16 +1338,6 @@ void GSDeviceVK::ExecuteCommandBuffer(WaitType wait_for_completion)
 	}
 }
 
-bool GSDeviceVK::CheckLastPresentFail()
-{
-	return m_last_present_failed.exchange(false, std::memory_order_acq_rel);
-}
-
-bool GSDeviceVK::CheckLastSubmitFail()
-{
-	return m_last_submit_failed.load(std::memory_order_acquire);
-}
-
 void GSDeviceVK::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation)
 {
 	FrameResources& resources = m_frame_resources[m_current_frame];
@@ -1809,7 +1704,7 @@ void GSDeviceVK::WaitForSpinCompletion(u32 index)
 	if (res != VK_SUCCESS)
 	{
 		LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
-		m_last_submit_failed.store(true, std::memory_order_release);
+		m_last_submit_failed = true;
 		return;
 	}
 	SpinCommandCompleted(index);
@@ -2169,7 +2064,6 @@ void GSDeviceVK::Destroy()
 		WaitForGPUIdle();
 	}
 
-	StopPresentThread();
 	m_swap_chain.reset();
 
 	DestroySpinResources();
@@ -2334,6 +2228,10 @@ GSDevice::PresentResult GSDeviceVK::BeginPresent(bool frame_skip)
 {
 	EndRenderPass();
 
+	// Check if the device was lost.
+	if (m_last_submit_failed)
+		return PresentResult::DeviceLost;
+
 	if (frame_skip)
 		return PresentResult::FrameSkipped;
 
@@ -2344,13 +2242,6 @@ GSDevice::PresentResult GSDeviceVK::BeginPresent(bool frame_skip)
 		return PresentResult::FrameSkipped;
 	}
 
-	// Previous frame needs to be presented before we can acquire the swap chain.
-	WaitForPresentComplete();
-
-	// Check if the device was lost.
-	if (CheckLastSubmitFail())
-		return PresentResult::DeviceLost;
-
 	VkResult res = m_swap_chain->AcquireNextImage();
 	if (res != VK_SUCCESS)
 	{
@@ -2422,7 +2313,7 @@ void GSDeviceVK::EndPresent()
 	m_swap_chain->GetCurrentTexture()->TransitionToLayout(cmdbuffer, GSTextureVK::Layout::PresentSrc);
 	g_perfmon.Put(GSPerfMon::RenderPasses, 1);
 
-	SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing());
+	SubmitCommandBuffer(m_swap_chain.get());
 	MoveToNextCommandBuffer();
 
 	InvalidateCachedState();
@@ -2621,9 +2512,6 @@ bool GSDeviceVK::CreateDeviceAndSwapChain()
 
 	VKShaderCache::Create();
 
-	if (!GSConfig.DisableThreadedPresentation)
-		StartPresentThread();
-
 	if (surface != VK_NULL_HANDLE)
 	{
 		VkPresentModeKHR present_mode;
@@ -4554,7 +4442,7 @@ void GSDeviceVK::RenderBlankFrame()
 		cmdbuffer, sctex->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &s_present_clear_color.color, 1, &srr);
 
 	m_swap_chain->GetCurrentTexture()->TransitionToLayout(cmdbuffer, GSTextureVK::Layout::PresentSrc);
-	SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing());
+	SubmitCommandBuffer(m_swap_chain.get());
 	ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS);
 }