summaryrefslogtreecommitdiffstats
path: root/src/intel/vulkan
diff options
context:
space:
mode:
authorChih-Wei Huang <cwhuang@linux.org.tw>2016-11-25 12:05:07 +0800
committerChih-Wei Huang <cwhuang@linux.org.tw>2016-11-25 12:05:07 +0800
commit524121d42bfdf8c1bd3565bd2adb0ffd7b52713f (patch)
tree57b645909523126d571949a0cabb16087aca9849 /src/intel/vulkan
parent5d0d07d402fa0edead26450fb86111292e8f834f (diff)
parentf7b58a378ca94cf1c2637d640ce5b9fb8f8519a6 (diff)
downloadexternal_mesa3d-524121d42bfdf8c1bd3565bd2adb0ffd7b52713f.tar.gz
external_mesa3d-524121d42bfdf8c1bd3565bd2adb0ffd7b52713f.tar.bz2
external_mesa3d-524121d42bfdf8c1bd3565bd2adb0ffd7b52713f.zip
Merge remote-tracking branch 'mesa/13.0' into nougat-x86
Diffstat (limited to 'src/intel/vulkan')
-rw-r--r--src/intel/vulkan/anv_blorp.c24
-rw-r--r--src/intel/vulkan/anv_cmd_buffer.c6
-rw-r--r--src/intel/vulkan/anv_descriptor_set.c12
-rw-r--r--src/intel/vulkan/anv_device.c192
-rw-r--r--src/intel/vulkan/anv_formats.c3
-rw-r--r--src/intel/vulkan/anv_genX.h2
-rw-r--r--src/intel/vulkan/anv_image.c12
-rw-r--r--src/intel/vulkan/anv_intel.c2
-rw-r--r--src/intel/vulkan/anv_pass.c3
-rw-r--r--src/intel/vulkan/anv_pipeline.c6
-rw-r--r--src/intel/vulkan/anv_pipeline_cache.c3
-rw-r--r--src/intel/vulkan/anv_private.h15
-rw-r--r--src/intel/vulkan/anv_query.c3
-rw-r--r--src/intel/vulkan/anv_wsi.c19
-rw-r--r--src/intel/vulkan/gen8_cmd_buffer.c29
-rw-r--r--src/intel/vulkan/genX_blorp_exec.c2
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c65
17 files changed, 345 insertions, 53 deletions
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 87f242cb25..d417469fe7 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -787,7 +787,7 @@ void anv_CmdClearColorImage(
unsigned base_layer = pRanges[r].baseArrayLayer;
unsigned layer_count = pRanges[r].layerCount;
- for (unsigned i = 0; i < pRanges[r].levelCount; i++) {
+ for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
const unsigned level = pRanges[r].baseMipLevel + i;
const unsigned level_width = anv_minify(image->extent.width, level);
const unsigned level_height = anv_minify(image->extent.height, level);
@@ -847,7 +847,7 @@ void anv_CmdClearDepthStencilImage(
unsigned base_layer = pRanges[r].baseArrayLayer;
unsigned layer_count = pRanges[r].layerCount;
- for (unsigned i = 0; i < pRanges[r].levelCount; i++) {
+ for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
const unsigned level = pRanges[r].baseMipLevel + i;
const unsigned level_width = anv_minify(image->extent.width, level);
const unsigned level_height = anv_minify(image->extent.height, level);
@@ -1141,15 +1141,6 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
- /* FINISHME(perf): Skip clears for resolve attachments.
- *
- * From the Vulkan 1.0 spec:
- *
- * If the first use of an attachment in a render pass is as a resolve
- * attachment, then the loadOp is effectively ignored as the resolve is
- * guaranteed to overwrite all pixels in the render area.
- */
-
if (!subpass->has_resolve)
return;
@@ -1163,6 +1154,17 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
if (dst_att == VK_ATTACHMENT_UNUSED)
continue;
+ if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
+ /* From the Vulkan 1.0 spec:
+ *
+ * If the first use of an attachment in a render pass is as a
+ * resolve attachment, then the loadOp is effectively ignored
+ * as the resolve is guaranteed to overwrite all pixels in the
+ * render area.
+ */
+ cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
+ }
+
struct anv_image_view *src_iview = fb->attachments[src_att];
struct anv_image_view *dst_iview = fb->attachments[dst_att];
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 7ff7dbabc7..44ae67d019 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -318,6 +318,9 @@ void anv_FreeCommandBuffers(
for (uint32_t i = 0; i < commandBufferCount; i++) {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
+ if (!cmd_buffer)
+ continue;
+
anv_cmd_buffer_destroy(cmd_buffer);
}
}
@@ -796,6 +799,9 @@ void anv_DestroyCommandPool(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool);
+ if (!pool)
+ return;
+
list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer,
&pool->cmd_buffers, pool_link) {
anv_cmd_buffer_destroy(cmd_buffer);
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index 7d5a78d018..17a1c8ead7 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -200,6 +200,9 @@ void anv_DestroyDescriptorSetLayout(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout);
+ if (!set_layout)
+ return;
+
vk_free2(&device->alloc, pAllocator, set_layout);
}
@@ -282,6 +285,9 @@ void anv_DestroyPipelineLayout(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout);
+ if (!pipeline_layout)
+ return;
+
vk_free2(&device->alloc, pAllocator, pipeline_layout);
}
@@ -355,6 +361,9 @@ void anv_DestroyDescriptorPool(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool);
+ if (!pool)
+ return;
+
anv_state_stream_finish(&pool->surface_state_stream);
vk_free2(&device->alloc, pAllocator, pool);
}
@@ -546,6 +555,9 @@ VkResult anv_FreeDescriptorSets(
for (uint32_t i = 0; i < count; i++) {
ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
+ if (!set)
+ continue;
+
anv_descriptor_set_destroy(device, pool, set);
}
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index e83887c36e..125df22d85 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -896,6 +896,12 @@ VkResult anv_CreateDevice(
pthread_mutex_init(&device->mutex, NULL);
+ pthread_condattr_t condattr;
+ pthread_condattr_init(&condattr);
+ pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
+ pthread_cond_init(&device->queue_submit, NULL);
+ pthread_condattr_destroy(&condattr);
+
anv_bo_pool_init(&device->batch_bo_pool, device);
anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
@@ -1001,10 +1007,11 @@ VkResult anv_EnumerateInstanceExtensionProperties(
return VK_SUCCESS;
}
- assert(*pPropertyCount >= ARRAY_SIZE(global_extensions));
+ *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
+ typed_memcpy(pProperties, global_extensions, *pPropertyCount);
- *pPropertyCount = ARRAY_SIZE(global_extensions);
- memcpy(pProperties, global_extensions, sizeof(global_extensions));
+ if (*pPropertyCount < ARRAY_SIZE(global_extensions))
+ return VK_INCOMPLETE;
return VK_SUCCESS;
}
@@ -1020,10 +1027,11 @@ VkResult anv_EnumerateDeviceExtensionProperties(
return VK_SUCCESS;
}
- assert(*pPropertyCount >= ARRAY_SIZE(device_extensions));
+ *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
+ typed_memcpy(pProperties, device_extensions, *pPropertyCount);
- *pPropertyCount = ARRAY_SIZE(device_extensions);
- memcpy(pProperties, device_extensions, sizeof(device_extensions));
+ if (*pPropertyCount < ARRAY_SIZE(device_extensions))
+ return VK_INCOMPLETE;
return VK_SUCCESS;
}
@@ -1141,6 +1149,11 @@ VkResult anv_QueueSubmit(
result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
if (result != VK_SUCCESS)
goto out;
+
+ /* Update the fence and wake up any waiters */
+ assert(fence->state == ANV_FENCE_STATE_RESET);
+ fence->state = ANV_FENCE_STATE_SUBMITTED;
+ pthread_cond_broadcast(&device->queue_submit);
}
out:
@@ -1518,7 +1531,11 @@ VkResult anv_CreateFence(
fence->execbuf.rsvd1 = device->context_id;
fence->execbuf.rsvd2 = 0;
- fence->ready = false;
+ if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ } else {
+ fence->state = ANV_FENCE_STATE_RESET;
+ }
*pFence = anv_fence_to_handle(fence);
@@ -1533,6 +1550,9 @@ void anv_DestroyFence(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_fence, fence, _fence);
+ if (!fence)
+ return;
+
assert(fence->bo.map == fence);
anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
}
@@ -1544,7 +1564,7 @@ VkResult anv_ResetFences(
{
for (uint32_t i = 0; i < fenceCount; i++) {
ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- fence->ready = false;
+ fence->state = ANV_FENCE_STATE_RESET;
}
return VK_SUCCESS;
@@ -1559,26 +1579,41 @@ VkResult anv_GetFenceStatus(
int64_t t = 0;
int ret;
- if (fence->ready)
- return VK_SUCCESS;
+ switch (fence->state) {
+ case ANV_FENCE_STATE_RESET:
+ /* If it hasn't even been sent off to the GPU yet, it's not ready */
+ return VK_NOT_READY;
- ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
- if (ret == 0) {
- fence->ready = true;
+ case ANV_FENCE_STATE_SIGNALED:
+ /* It's been signaled, return success */
return VK_SUCCESS;
- }
- return VK_NOT_READY;
+ case ANV_FENCE_STATE_SUBMITTED:
+ /* It's been submitted to the GPU but we don't know if it's done yet. */
+ ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
+ if (ret == 0) {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ return VK_SUCCESS;
+ } else {
+ return VK_NOT_READY;
+ }
+ default:
+ unreachable("Invalid fence status");
+ }
}
+#define NSEC_PER_SEC 1000000000
+#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
+
VkResult anv_WaitForFences(
VkDevice _device,
uint32_t fenceCount,
const VkFence* pFences,
VkBool32 waitAll,
- uint64_t timeout)
+ uint64_t _timeout)
{
ANV_FROM_HANDLE(anv_device, device, _device);
+ int ret;
/* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
* to block indefinitely timeouts <= 0. Unfortunately, this was broken
@@ -1587,22 +1622,107 @@ VkResult anv_WaitForFences(
* best we can do is to clamp the timeout to INT64_MAX. This limits the
* maximum timeout from 584 years to 292 years - likely not a big deal.
*/
- if (timeout > INT64_MAX)
- timeout = INT64_MAX;
-
- int64_t t = timeout;
+ int64_t timeout = MIN2(_timeout, INT64_MAX);
+
+ uint32_t pending_fences = fenceCount;
+ while (pending_fences) {
+ pending_fences = 0;
+ bool signaled_fences = false;
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ switch (fence->state) {
+ case ANV_FENCE_STATE_RESET:
+ /* This fence hasn't been submitted yet, we'll catch it the next
+ * time around. Yes, this may mean we dead-loop but, short of
+ * lots of locking and a condition variable, there's not much that
+ * we can do about that.
+ */
+ pending_fences++;
+ continue;
+
+ case ANV_FENCE_STATE_SIGNALED:
+ /* This fence is not pending. If waitAll isn't set, we can return
+ * early. Otherwise, we have to keep going.
+ */
+ if (!waitAll)
+ return VK_SUCCESS;
+ continue;
+
+ case ANV_FENCE_STATE_SUBMITTED:
+ /* These are the fences we really care about. Go ahead and wait
+ * on it until we hit a timeout.
+ */
+ ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout);
+ if (ret == -1 && errno == ETIME) {
+ return VK_TIMEOUT;
+ } else if (ret == -1) {
+ /* We don't know the real error. */
+ return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
+ } else {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ signaled_fences = true;
+ if (!waitAll)
+ return VK_SUCCESS;
+ continue;
+ }
+ }
+ }
- /* FIXME: handle !waitAll */
+ if (pending_fences && !signaled_fences) {
+ /* If we've hit this then someone decided to vkWaitForFences before
+ * they've actually submitted any of them to a queue. This is a
+ * fairly pessimal case, so it's ok to lock here and use a standard
+ * pthreads condition variable.
+ */
+ pthread_mutex_lock(&device->mutex);
+
+ /* It's possible that some of the fences have changed state since the
+ * last time we checked. Now that we have the lock, check for
+ * pending fences again and don't wait if it's changed.
+ */
+ uint32_t now_pending_fences = 0;
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ if (fence->state == ANV_FENCE_STATE_RESET)
+ now_pending_fences++;
+ }
+ assert(now_pending_fences <= pending_fences);
+
+ if (now_pending_fences == pending_fences) {
+ struct timespec before;
+ clock_gettime(CLOCK_MONOTONIC, &before);
+
+ uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
+ uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
+ (timeout / NSEC_PER_SEC);
+ abs_nsec %= NSEC_PER_SEC;
+
+ /* Avoid roll-over in tv_sec on 32-bit systems if the user
+ * provided timeout is UINT64_MAX
+ */
+ struct timespec abstime;
+ abstime.tv_nsec = abs_nsec;
+ abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
+
+ ret = pthread_cond_timedwait(&device->queue_submit,
+ &device->mutex, &abstime);
+ assert(ret != EINVAL);
+
+ struct timespec after;
+ clock_gettime(CLOCK_MONOTONIC, &after);
+ uint64_t time_elapsed =
+ ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
+ ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
+
+ if (time_elapsed >= timeout) {
+ pthread_mutex_unlock(&device->mutex);
+ return VK_TIMEOUT;
+ }
+
+ timeout -= time_elapsed;
+ }
- for (uint32_t i = 0; i < fenceCount; i++) {
- ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- int ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
- if (ret == -1 && errno == ETIME) {
- return VK_TIMEOUT;
- } else if (ret == -1) {
- /* We don't know the real error. */
- return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
- "gem wait failed: %m");
+ pthread_mutex_unlock(&device->mutex);
}
}
@@ -1672,6 +1792,9 @@ void anv_DestroyEvent(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_event, event, _event);
+ if (!event)
+ return;
+
anv_state_pool_free(&device->dynamic_state_pool, event->state);
}
@@ -1764,6 +1887,9 @@ void anv_DestroyBuffer(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+ if (!buffer)
+ return;
+
vk_free2(&device->alloc, pAllocator, buffer);
}
@@ -1791,6 +1917,9 @@ void anv_DestroySampler(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
+ if (!sampler)
+ return;
+
vk_free2(&device->alloc, pAllocator, sampler);
}
@@ -1835,5 +1964,8 @@ void anv_DestroyFramebuffer(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
+ if (!fb)
+ return;
+
vk_free2(&device->alloc, pAllocator, fb);
}
diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
index f6915540fb..875a7d33a4 100644
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -463,6 +463,9 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties(
uint32_t maxArraySize;
VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
+ if (anv_formats[format].isl_format == ISL_FORMAT_UNSUPPORTED)
+ goto unsupported;
+
anv_physical_device_get_format_properties(physical_device, format,
&format_props);
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index d4ed3256b9..9f843b35b5 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -42,6 +42,8 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
+void genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer);
+
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index b7c2e991da..4a4d87e6a8 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -275,8 +275,12 @@ anv_DestroyImage(VkDevice _device, VkImage _image,
const VkAllocationCallbacks *pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_image, image, _image);
+
+ if (!image)
+ return;
- vk_free2(&device->alloc, pAllocator, anv_image_from_handle(_image));
+ vk_free2(&device->alloc, pAllocator, image);
}
VkResult anv_BindImageMemory(
@@ -565,6 +569,9 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview,
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_image_view, iview, _iview);
+ if (!iview)
+ return;
+
if (iview->color_rt_surface_state.alloc_size > 0) {
anv_state_pool_free(&device->surface_state_pool,
iview->color_rt_surface_state);
@@ -655,6 +662,9 @@ anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_buffer_view, view, bufferView);
+ if (!view)
+ return;
+
if (view->surface_state.alloc_size > 0)
anv_state_pool_free(&device->surface_state_pool,
view->surface_state);
diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
index 1c50e2bdd3..c356e848fe 100644
--- a/src/intel/vulkan/anv_intel.c
+++ b/src/intel/vulkan/anv_intel.c
@@ -55,7 +55,7 @@ VkResult anv_CreateDmaBufImageINTEL(
goto fail;
}
- uint64_t size = pCreateInfo->strideInBytes * pCreateInfo->extent.height;
+ uint64_t size = (uint64_t)pCreateInfo->strideInBytes * pCreateInfo->extent.height;
anv_bo_init(&mem->bo, gem_handle, size);
diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index 6eaa5c85ca..1f35a42765 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -146,6 +146,9 @@ void anv_DestroyRenderPass(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_render_pass, pass, _pass);
+ if (!pass)
+ return;
+
vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
vk_free2(&device->alloc, pAllocator, pass);
}
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 4b8020a4cd..e543c98384 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -75,6 +75,9 @@ void anv_DestroyShaderModule(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_shader_module, module, _module);
+ if (!module)
+ return;
+
vk_free2(&device->alloc, pAllocator, module);
}
@@ -189,6 +192,9 @@ void anv_DestroyPipeline(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
+ if (!pipeline)
+ return;
+
anv_reloc_list_finish(&pipeline->batch_relocs,
pAllocator ? pAllocator : &device->alloc);
if (pipeline->blend_state.map)
diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c
index ff6e65107e..ddd51dbfd7 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -454,6 +454,9 @@ void anv_DestroyPipelineCache(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
+ if (!cache)
+ return;
+
anv_pipeline_cache_finish(cache);
vk_free2(&device->alloc, pAllocator, cache);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 31b47669b0..06cdc0a81f 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -577,6 +577,7 @@ struct anv_device {
uint32_t default_mocs;
pthread_mutex_t mutex;
+ pthread_cond_t queue_submit;
};
void anv_device_get_cache_uuid(void *uuid);
@@ -1251,11 +1252,23 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
+enum anv_fence_state {
+ /** Indicates that this is a new (or newly reset fence) */
+ ANV_FENCE_STATE_RESET,
+
+ /** Indicates that this fence has been submitted to the GPU but is still
+ * (as far as we know) in use by the GPU.
+ */
+ ANV_FENCE_STATE_SUBMITTED,
+
+ ANV_FENCE_STATE_SIGNALED,
+};
+
struct anv_fence {
struct anv_bo bo;
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_exec_object2 exec2_objects[1];
- bool ready;
+ enum anv_fence_state state;
};
struct anv_event {
diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c
index 4afdaaf367..293257b22f 100644
--- a/src/intel/vulkan/anv_query.c
+++ b/src/intel/vulkan/anv_query.c
@@ -87,6 +87,9 @@ void anv_DestroyQueryPool(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
+ if (!pool)
+ return;
+
anv_gem_munmap(pool->bo.map, pool->bo.size);
anv_gem_close(device, pool->bo.gem_handle);
vk_free2(&device->alloc, pAllocator, pool);
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 064581d5d3..c504658c52 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -76,6 +76,9 @@ void anv_DestroySurfaceKHR(
ANV_FROM_HANDLE(anv_instance, instance, _instance);
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+ if (!surface)
+ return;
+
vk_free2(&instance->alloc, pAllocator, surface);
}
@@ -294,6 +297,9 @@ void anv_DestroySwapchainKHR(
ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
const VkAllocationCallbacks *alloc;
+ if (!swapchain)
+ return;
+
if (pAllocator)
alloc = pAllocator;
else
@@ -323,13 +329,20 @@ VkResult anv_AcquireNextImageKHR(
VkSwapchainKHR _swapchain,
uint64_t timeout,
VkSemaphore semaphore,
- VkFence fence,
+ VkFence _fence,
uint32_t* pImageIndex)
{
ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+ ANV_FROM_HANDLE(anv_fence, fence, _fence);
+
+ VkResult result = swapchain->acquire_next_image(swapchain, timeout,
+ semaphore, pImageIndex);
- return swapchain->acquire_next_image(swapchain, timeout, semaphore,
- pImageIndex);
+ /* Thanks to implicit sync, the image is ready immediately. */
+ if (fence)
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+
+ return result;
}
VkResult anv_QueuePresentKHR(
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 0548a5ee73..f1dfe7b820 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -513,6 +513,25 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer,
}
}
+/* Set of stage bits for which are pipelined, i.e. they get queued by the
+ * command streamer for later execution.
+ */
+#define ANV_PIPELINE_STAGE_PIPELINED_BITS \
+ (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \
+ VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \
+ VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \
+ VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \
+ VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \
+ VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \
+ VK_PIPELINE_STAGE_TRANSFER_BIT | \
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \
+ VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
+
void genX(CmdSetEvent)(
VkCommandBuffer commandBuffer,
VkEvent _event,
@@ -522,6 +541,11 @@ void genX(CmdSetEvent)(
ANV_FROM_HANDLE(anv_event, event, _event);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
+ pc.StallAtPixelScoreboard = true;
+ pc.CommandStreamerStallEnable = true;
+ }
+
pc.DestinationAddressType = DAT_PPGTT,
pc.PostSyncOperation = WriteImmediateData,
pc.Address = (struct anv_address) {
@@ -541,6 +565,11 @@ void genX(CmdResetEvent)(
ANV_FROM_HANDLE(anv_event, event, _event);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
+ pc.StallAtPixelScoreboard = true;
+ pc.CommandStreamerStallEnable = true;
+ }
+
pc.DestinationAddressType = DAT_PPGTT;
pc.PostSyncOperation = WriteImmediateData;
pc.Address = (struct anv_address) {
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 185aff6b58..cd9780d6c7 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -164,6 +164,8 @@ genX(blorp_exec)(struct blorp_batch *batch,
genX(flush_pipeline_select_3d)(cmd_buffer);
+ genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
+
blorp_exec(batch, params);
cmd_buffer->state.vb_dirty = ~0;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 2bc7e7473a..f1b538761c 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1343,12 +1343,22 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
struct anv_state surfaces = { 0, }, samplers = { 0, };
VkResult result;
- result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
- if (result != VK_SUCCESS)
- return result;
result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS)
- return result;
+ if (result != VK_SUCCESS) {
+ result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
+ assert(result == VK_SUCCESS);
+
+ /* Re-emit state base addresses so we get the new surface state base
+ * address before we start emitting binding tables etc.
+ */
+ genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
+
+ result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
+ assert(result == VK_SUCCESS);
+ }
+ result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
+ assert(result == VK_SUCCESS);
+
struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
@@ -1408,8 +1418,20 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
- if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
+ if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) {
+ /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE:
+ *
+ * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
+ * the only bits that are changed are scoreboard related: Scoreboard
+ * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
+ * these scoreboard related states, a MEDIA_STATE_FLUSH is
+ * sufficient."
+ */
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+ }
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
(cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
@@ -1661,6 +1683,35 @@ genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer)
}
}
+void
+genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer)
+{
+ if (GEN_GEN >= 8)
+ return;
+
+ /* From the Haswell PRM, documentation for 3DSTATE_DEPTH_BUFFER:
+ *
+ * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., any
+ * combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
+ * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
+ * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
+ * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
+ * Depth Flush Bit set, followed by another pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
+ * guarantee that the pipeline from WM onwards is already flushed (e.g.,
+ * via a preceding MI_FLUSH)."
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthStallEnable = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthCacheFlushEnable = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthStallEnable = true;
+ }
+}
+
static void
cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
{
@@ -1677,6 +1728,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
/* FIXME: Implement the PMA stall W/A */
/* FIXME: Width and Height are wrong */
+ genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
+
/* Emit 3DSTATE_DEPTH_BUFFER */
if (has_depth) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) {