diff options
author | Chih-Wei Huang <cwhuang@linux.org.tw> | 2016-11-25 12:05:07 +0800 |
---|---|---|
committer | Chih-Wei Huang <cwhuang@linux.org.tw> | 2016-11-25 12:05:07 +0800 |
commit | 524121d42bfdf8c1bd3565bd2adb0ffd7b52713f (patch) | |
tree | 57b645909523126d571949a0cabb16087aca9849 /src/intel/vulkan | |
parent | 5d0d07d402fa0edead26450fb86111292e8f834f (diff) | |
parent | f7b58a378ca94cf1c2637d640ce5b9fb8f8519a6 (diff) | |
download | external_mesa3d-524121d42bfdf8c1bd3565bd2adb0ffd7b52713f.tar.gz external_mesa3d-524121d42bfdf8c1bd3565bd2adb0ffd7b52713f.tar.bz2 external_mesa3d-524121d42bfdf8c1bd3565bd2adb0ffd7b52713f.zip |
Merge remote-tracking branch 'mesa/13.0' into nougat-x86
Diffstat (limited to 'src/intel/vulkan')
-rw-r--r-- | src/intel/vulkan/anv_blorp.c | 24 | ||||
-rw-r--r-- | src/intel/vulkan/anv_cmd_buffer.c | 6 | ||||
-rw-r--r-- | src/intel/vulkan/anv_descriptor_set.c | 12 | ||||
-rw-r--r-- | src/intel/vulkan/anv_device.c | 192 | ||||
-rw-r--r-- | src/intel/vulkan/anv_formats.c | 3 | ||||
-rw-r--r-- | src/intel/vulkan/anv_genX.h | 2 | ||||
-rw-r--r-- | src/intel/vulkan/anv_image.c | 12 | ||||
-rw-r--r-- | src/intel/vulkan/anv_intel.c | 2 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pass.c | 3 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 6 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline_cache.c | 3 | ||||
-rw-r--r-- | src/intel/vulkan/anv_private.h | 15 | ||||
-rw-r--r-- | src/intel/vulkan/anv_query.c | 3 | ||||
-rw-r--r-- | src/intel/vulkan/anv_wsi.c | 19 | ||||
-rw-r--r-- | src/intel/vulkan/gen8_cmd_buffer.c | 29 | ||||
-rw-r--r-- | src/intel/vulkan/genX_blorp_exec.c | 2 | ||||
-rw-r--r-- | src/intel/vulkan/genX_cmd_buffer.c | 65 |
17 files changed, 345 insertions, 53 deletions
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 87f242cb25..d417469fe7 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -787,7 +787,7 @@ void anv_CmdClearColorImage( unsigned base_layer = pRanges[r].baseArrayLayer; unsigned layer_count = pRanges[r].layerCount; - for (unsigned i = 0; i < pRanges[r].levelCount; i++) { + for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { const unsigned level = pRanges[r].baseMipLevel + i; const unsigned level_width = anv_minify(image->extent.width, level); const unsigned level_height = anv_minify(image->extent.height, level); @@ -847,7 +847,7 @@ void anv_CmdClearDepthStencilImage( unsigned base_layer = pRanges[r].baseArrayLayer; unsigned layer_count = pRanges[r].layerCount; - for (unsigned i = 0; i < pRanges[r].levelCount; i++) { + for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { const unsigned level = pRanges[r].baseMipLevel + i; const unsigned level_width = anv_minify(image->extent.width, level); const unsigned level_height = anv_minify(image->extent.height, level); @@ -1141,15 +1141,6 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; - /* FINISHME(perf): Skip clears for resolve attachments. - * - * From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a resolve - * attachment, then the loadOp is effectively ignored as the resolve is - * guaranteed to overwrite all pixels in the render area. - */ - if (!subpass->has_resolve) return; @@ -1163,6 +1154,17 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) if (dst_att == VK_ATTACHMENT_UNUSED) continue; + if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { + /* From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a + * resolve attachment, then the loadOp is effectively ignored + * as the resolve is guaranteed to overwrite all pixels in the + * render area. + */ + cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; + } + struct anv_image_view *src_iview = fb->attachments[src_att]; struct anv_image_view *dst_iview = fb->attachments[dst_att]; diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 7ff7dbabc7..44ae67d019 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -318,6 +318,9 @@ void anv_FreeCommandBuffers( for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + if (!cmd_buffer) + continue; + anv_cmd_buffer_destroy(cmd_buffer); } } @@ -796,6 +799,9 @@ void anv_DestroyCommandPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + if (!pool) + return; + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) { anv_cmd_buffer_destroy(cmd_buffer); diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 7d5a78d018..17a1c8ead7 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -200,6 +200,9 @@ void anv_DestroyDescriptorSetLayout( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + if (!set_layout) + return; + vk_free2(&device->alloc, pAllocator, set_layout); } @@ -282,6 +285,9 @@ void anv_DestroyPipelineLayout( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + if (!pipeline_layout) + return; + vk_free2(&device->alloc, pAllocator, pipeline_layout); } @@ -355,6 +361,9 @@ void anv_DestroyDescriptorPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool); + if (!pool) + return; + anv_state_stream_finish(&pool->surface_state_stream); vk_free2(&device->alloc, pAllocator, pool); } @@ -546,6 +555,9 @@ VkResult anv_FreeDescriptorSets( for (uint32_t i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + if (!set) + continue; + anv_descriptor_set_destroy(device, pool, set); } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e83887c36e..125df22d85 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -896,6 +896,12 @@ VkResult anv_CreateDevice( pthread_mutex_init(&device->mutex, NULL); + pthread_condattr_t condattr; + pthread_condattr_init(&condattr); + pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC); + pthread_cond_init(&device->queue_submit, NULL); + pthread_condattr_destroy(&condattr); + anv_bo_pool_init(&device->batch_bo_pool, device); anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); @@ -1001,10 +1007,11 @@ VkResult anv_EnumerateInstanceExtensionProperties( return VK_SUCCESS; } - assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions)); + typed_memcpy(pProperties, global_extensions, *pPropertyCount); - *pPropertyCount = ARRAY_SIZE(global_extensions); - memcpy(pProperties, global_extensions, sizeof(global_extensions)); + if (*pPropertyCount < ARRAY_SIZE(global_extensions)) + return VK_INCOMPLETE; return VK_SUCCESS; } @@ -1020,10 +1027,11 @@ VkResult anv_EnumerateDeviceExtensionProperties( return VK_SUCCESS; } - assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions)); + typed_memcpy(pProperties, device_extensions, *pPropertyCount); - *pPropertyCount = ARRAY_SIZE(device_extensions); - memcpy(pProperties, device_extensions, sizeof(device_extensions)); + if (*pPropertyCount < ARRAY_SIZE(device_extensions)) + return VK_INCOMPLETE; return VK_SUCCESS; } @@ -1141,6 +1149,11 @@ VkResult anv_QueueSubmit( result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); if (result != VK_SUCCESS) goto out; + + /* Update the fence and wake up any waiters */ + assert(fence->state == ANV_FENCE_STATE_RESET); + fence->state = ANV_FENCE_STATE_SUBMITTED; + pthread_cond_broadcast(&device->queue_submit); } out: @@ -1518,7 +1531,11 @@ VkResult anv_CreateFence( fence->execbuf.rsvd1 = device->context_id; fence->execbuf.rsvd2 = 0; - fence->ready = false; + if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { + fence->state = ANV_FENCE_STATE_SIGNALED; + } else { + fence->state = ANV_FENCE_STATE_RESET; + } *pFence = anv_fence_to_handle(fence); @@ -1533,6 +1550,9 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); + if (!fence) + return; + assert(fence->bo.map == fence); anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); } @@ -1544,7 +1564,7 @@ VkResult anv_ResetFences( { for (uint32_t i = 0; i < fenceCount; i++) { ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->ready = false; + fence->state = ANV_FENCE_STATE_RESET; } return VK_SUCCESS; @@ -1559,26 +1579,41 @@ VkResult anv_GetFenceStatus( int64_t t = 0; int ret; - if (fence->ready) - return VK_SUCCESS; + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* If it hasn't even been sent off to the GPU yet, it's not ready */ + return VK_NOT_READY; - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == 0) { - fence->ready = true; + case ANV_FENCE_STATE_SIGNALED: + /* It's been signaled, return success */ return VK_SUCCESS; - } - return VK_NOT_READY; + case ANV_FENCE_STATE_SUBMITTED: + /* It's been submitted to the GPU but we don't know if it's done yet. */ + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->state = ANV_FENCE_STATE_SIGNALED; + return VK_SUCCESS; + } else { + return VK_NOT_READY; + } + default: + unreachable("Invalid fence status"); + } } +#define NSEC_PER_SEC 1000000000 +#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1) + VkResult anv_WaitForFences( VkDevice _device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, - uint64_t timeout) + uint64_t _timeout) { ANV_FROM_HANDLE(anv_device, device, _device); + int ret; /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed * to block indefinitely timeouts <= 0. Unfortunately, this was broken @@ -1587,22 +1622,107 @@ VkResult anv_WaitForFences( * best we can do is to clamp the timeout to INT64_MAX. This limits the * maximum timeout from 584 years to 292 years - likely not a big deal. */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - int64_t t = timeout; + int64_t timeout = MIN2(_timeout, INT64_MAX); + + uint32_t pending_fences = fenceCount; + while (pending_fences) { + pending_fences = 0; + bool signaled_fences = false; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* This fence hasn't been submitted yet, we'll catch it the next + * time around. Yes, this may mean we dead-loop but, short of + * lots of locking and a condition variable, there's not much that + * we can do about that. + */ + pending_fences++; + continue; + + case ANV_FENCE_STATE_SIGNALED: + /* This fence is not pending. If waitAll isn't set, we can return + * early. Otherwise, we have to keep going. + */ + if (!waitAll) + return VK_SUCCESS; + continue; + + case ANV_FENCE_STATE_SUBMITTED: + /* These are the fences we really care about. Go ahead and wait + * on it until we hit a timeout. + */ + ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout); + if (ret == -1 && errno == ETIME) { + return VK_TIMEOUT; + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m"); + } else { + fence->state = ANV_FENCE_STATE_SIGNALED; + signaled_fences = true; + if (!waitAll) + return VK_SUCCESS; + continue; + } + } + } - /* FIXME: handle !waitAll */ + if (pending_fences && !signaled_fences) { + /* If we've hit this then someone decided to vkWaitForFences before + * they've actually submitted any of them to a queue. This is a + * fairly pessimal case, so it's ok to lock here and use a standard + * pthreads condition variable. + */ + pthread_mutex_lock(&device->mutex); + + /* It's possible that some of the fences have changed state since the + * last time we checked. Now that we have the lock, check for + * pending fences again and don't wait if it's changed. + */ + uint32_t now_pending_fences = 0; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + if (fence->state == ANV_FENCE_STATE_RESET) + now_pending_fences++; + } + assert(now_pending_fences <= pending_fences); + + if (now_pending_fences == pending_fences) { + struct timespec before; + clock_gettime(CLOCK_MONOTONIC, &before); + + uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC; + uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) + + (timeout / NSEC_PER_SEC); + abs_nsec %= NSEC_PER_SEC; + + /* Avoid roll-over in tv_sec on 32-bit systems if the user + * provided timeout is UINT64_MAX + */ + struct timespec abstime; + abstime.tv_nsec = abs_nsec; + abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec)); + + ret = pthread_cond_timedwait(&device->queue_submit, + &device->mutex, &abstime); + assert(ret != EINVAL); + + struct timespec after; + clock_gettime(CLOCK_MONOTONIC, &after); + uint64_t time_elapsed = + ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) - + ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec); + + if (time_elapsed >= timeout) { + pthread_mutex_unlock(&device->mutex); + return VK_TIMEOUT; + } + + timeout -= time_elapsed; + } - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == -1 && errno == ETIME) { - return VK_TIMEOUT; - } else if (ret == -1) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "gem wait failed: %m"); + pthread_mutex_unlock(&device->mutex); } } @@ -1672,6 +1792,9 @@ void anv_DestroyEvent( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_event, event, _event); + if (!event) + return; + anv_state_pool_free(&device->dynamic_state_pool, event->state); } @@ -1764,6 +1887,9 @@ void anv_DestroyBuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + if (!buffer) + return; + vk_free2(&device->alloc, pAllocator, buffer); } @@ -1791,6 +1917,9 @@ void anv_DestroySampler( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + if (!sampler) + return; + vk_free2(&device->alloc, pAllocator, sampler); } @@ -1835,5 +1964,8 @@ void anv_DestroyFramebuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + if (!fb) + return; + vk_free2(&device->alloc, pAllocator, fb); } diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index f6915540fb..875a7d33a4 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -463,6 +463,9 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( uint32_t maxArraySize; VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + if (anv_formats[format].isl_format == ISL_FORMAT_UNSUPPORTED) + goto unsupported; + anv_physical_device_get_format_properties(physical_device, format, &format_props); diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index d4ed3256b9..9f843b35b5 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -42,6 +42,8 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer); + void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index b7c2e991da..4a4d87e6a8 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -275,8 +275,12 @@ anv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image, image, _image); + + if (!image) + return; - vk_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); + vk_free2(&device->alloc, pAllocator, image); } VkResult anv_BindImageMemory( @@ -565,6 +569,9 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image_view, iview, _iview); + if (!iview) + return; + if (iview->color_rt_surface_state.alloc_size > 0) { anv_state_pool_free(&device->surface_state_pool, iview->color_rt_surface_state); @@ -655,6 +662,9 @@ anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + if (!view) + return; + if (view->surface_state.alloc_size > 0) anv_state_pool_free(&device->surface_state_pool, view->surface_state); diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index 1c50e2bdd3..c356e848fe 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -55,7 +55,7 @@ VkResult anv_CreateDmaBufImageINTEL( goto fail; } - uint64_t size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + uint64_t size = (uint64_t)pCreateInfo->strideInBytes * pCreateInfo->extent.height; anv_bo_init(&mem->bo, gem_handle, size); diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index 6eaa5c85ca..1f35a42765 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -146,6 +146,9 @@ void anv_DestroyRenderPass( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + if (!pass) + return; + vk_free2(&device->alloc, pAllocator, pass->subpass_attachments); vk_free2(&device->alloc, pAllocator, pass); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 4b8020a4cd..e543c98384 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -75,6 +75,9 @@ void anv_DestroyShaderModule( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_shader_module, module, _module); + if (!module) + return; + vk_free2(&device->alloc, pAllocator, module); } @@ -189,6 +192,9 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + if (!pipeline) + return; + anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index ff6e65107e..ddd51dbfd7 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -454,6 +454,9 @@ void anv_DestroyPipelineCache( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + if (!cache) + return; + anv_pipeline_cache_finish(cache); vk_free2(&device->alloc, pAllocator, cache); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 31b47669b0..06cdc0a81f 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -577,6 +577,7 @@ struct anv_device { uint32_t default_mocs; pthread_mutex_t mutex; + pthread_cond_t queue_submit; }; void anv_device_get_cache_uuid(void *uuid); @@ -1251,11 +1252,23 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +enum anv_fence_state { + /** Indicates that this is a new (or newly reset fence) */ + ANV_FENCE_STATE_RESET, + + /** Indicates that this fence has been submitted to the GPU but is still + * (as far as we know) in use by the GPU. + */ + ANV_FENCE_STATE_SUBMITTED, + + ANV_FENCE_STATE_SIGNALED, +}; + struct anv_fence { struct anv_bo bo; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; - bool ready; + enum anv_fence_state state; }; struct anv_event { diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c index 4afdaaf367..293257b22f 100644 --- a/src/intel/vulkan/anv_query.c +++ b/src/intel/vulkan/anv_query.c @@ -87,6 +87,9 @@ void anv_DestroyQueryPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + if (!pool) + return; + anv_gem_munmap(pool->bo.map, pool->bo.size); anv_gem_close(device, pool->bo.gem_handle); vk_free2(&device->alloc, pAllocator, pool); diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index 064581d5d3..c504658c52 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -76,6 +76,9 @@ void anv_DestroySurfaceKHR( ANV_FROM_HANDLE(anv_instance, instance, _instance); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + if (!surface) + return; + vk_free2(&instance->alloc, pAllocator, surface); } @@ -294,6 +297,9 @@ void anv_DestroySwapchainKHR( ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); const VkAllocationCallbacks *alloc; + if (!swapchain) + return; + if (pAllocator) alloc = pAllocator; else @@ -323,13 +329,20 @@ VkResult anv_AcquireNextImageKHR( VkSwapchainKHR _swapchain, uint64_t timeout, VkSemaphore semaphore, - VkFence fence, + VkFence _fence, uint32_t* pImageIndex) { ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + VkResult result = swapchain->acquire_next_image(swapchain, timeout, + semaphore, pImageIndex); - return swapchain->acquire_next_image(swapchain, timeout, semaphore, - pImageIndex); + /* Thanks to implicit sync, the image is ready immediately. */ + if (fence) + fence->state = ANV_FENCE_STATE_SIGNALED; + + return result; } VkResult anv_QueuePresentKHR( diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 0548a5ee73..f1dfe7b820 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -513,6 +513,25 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer, } } +/* Set of stage bits for which are pipelined, i.e. they get queued by the + * command streamer for later execution. + */ +#define ANV_PIPELINE_STAGE_PIPELINED_BITS \ + (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \ + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \ + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \ + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \ + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \ + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \ + VK_PIPELINE_STAGE_TRANSFER_BIT | \ + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \ + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) + void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent _event, @@ -522,6 +541,11 @@ void genX(CmdSetEvent)( ANV_FROM_HANDLE(anv_event, event, _event); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + pc.DestinationAddressType = DAT_PPGTT, pc.PostSyncOperation = WriteImmediateData, pc.Address = (struct anv_address) { @@ -541,6 +565,11 @@ void genX(CmdResetEvent)( ANV_FROM_HANDLE(anv_event, event, _event); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = (struct anv_address) { diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 185aff6b58..cd9780d6c7 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -164,6 +164,8 @@ genX(blorp_exec)(struct blorp_batch *batch, genX(flush_pipeline_select_3d)(cmd_buffer); + genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); + blorp_exec(batch, params); cmd_buffer->state.vb_dirty = ~0; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2bc7e7473a..f1b538761c 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1343,12 +1343,22 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; - result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; + if (result != VK_SUCCESS) { + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + + result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); + assert(result == VK_SUCCESS); + } + result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); + assert(result == VK_SUCCESS); + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); @@ -1408,8 +1418,20 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(flush_pipeline_select_gpgpu)(cmd_buffer); - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) { + /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE: + * + * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless + * the only bits that are changed are scoreboard related: Scoreboard + * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For + * these scoreboard related states, a MEDIA_STATE_FLUSH is + * sufficient." + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { @@ -1661,6 +1683,35 @@ genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) } } +void +genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer) +{ + if (GEN_GEN >= 8) + return; + + /* From the Haswell PRM, documentation for 3DSTATE_DEPTH_BUFFER: + * + * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., any + * combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, + * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first + * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit + * set), followed by a pipelined depth cache flush (PIPE_CONTROL with + * Depth Flush Bit set, followed by another pipelined depth stall + * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise + * guarantee that the pipeline from WM onwards is already flushed (e.g., + * via a preceding MI_FLUSH)." + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthStallEnable = true; + } + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthCacheFlushEnable = true; + } + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthStallEnable = true; + } +} + static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { @@ -1677,6 +1728,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ + genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); + /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) { |