summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChih-Wei Huang <cwhuang@linux.org.tw>2017-01-08 16:30:27 (GMT)
committerChih-Wei Huang <cwhuang@linux.org.tw>2017-01-08 16:30:27 (GMT)
commite123ad753c152491c07a5cc6d346f62624b07949 (patch)
tree9095413f68d7d91a7dfc1b18ead20ef5616b43d6
parentdffabc025cca524fecb7a03d0f65ec9c628025f8 (diff)
parentc8ece92ded9337b9ed60aa9568b41313025a1406 (diff)
downloadexternal_mesa3d-e123ad753c152491c07a5cc6d346f62624b07949.zip
external_mesa3d-e123ad753c152491c07a5cc6d346f62624b07949.tar.gz
external_mesa3d-e123ad753c152491c07a5cc6d346f62624b07949.tar.bz2
Merge remote-tracking branch 'mesa/13.0' into nougat-x86
-rw-r--r--VERSION2
-rw-r--r--docs/relnotes/13.0.3.html177
-rw-r--r--src/amd/common/ac_nir_to_llvm.c2
-rw-r--r--src/amd/vulkan/radv_device.c3
-rw-r--r--src/amd/vulkan/radv_meta_bufimage.c27
-rw-r--r--src/compiler/Makefile.glsl.am5
-rw-r--r--src/compiler/glsl/builtin_functions.cpp12
-rw-r--r--src/compiler/glsl/link_uniforms.cpp2
-rw-r--r--src/compiler/glsl/linker.cpp1
-rw-r--r--src/compiler/nir/nir_opt_undef.c4
-rw-r--r--src/compiler/spirv/vtn_glsl450.c23
-rw-r--r--src/egl/main/eglapi.c4
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c19
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c5
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c183
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c25
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c24
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c7
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c27
-rw-r--r--src/intel/genxml/gen9.xml2
-rw-r--r--src/intel/vulkan/anv_allocator.c27
-rw-r--r--src/intel/vulkan/anv_descriptor_set.c1
-rw-r--r--src/intel/vulkan/anv_device.c52
-rw-r--r--src/intel/vulkan/anv_gem.c6
-rw-r--r--src/intel/vulkan/anv_image.c16
-rw-r--r--src/intel/vulkan/anv_private.h13
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c36
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.am7
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp19
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp213
-rw-r--r--src/mesa/main/api_validate.c2
-rw-r--r--src/mesa/main/fbobject.c1
-rw-r--r--src/mesa/main/program_resource.c111
-rw-r--r--src/vulkan/wsi/wsi_common_queue.h1
43 files changed, 814 insertions, 285 deletions
diff --git a/VERSION b/VERSION
index 347caf3..2cb4f2f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-13.0.2
+13.0.3
diff --git a/docs/relnotes/13.0.3.html b/docs/relnotes/13.0.3.html
new file mode 100644
index 0000000..59bc47d
--- /dev/null
+++ b/docs/relnotes/13.0.3.html
@@ -0,0 +1,177 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.3 Release Notes / January 5, 2017</h1>
+
+<p>
+Mesa 13.0.3 is a bug fix release which fixes bugs found since the 13.0.2 release.
+</p>
+<p>
+Mesa 13.0.3 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4. OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+55b07d056f9b855ba9d7c8b2ddc7d3b220a61c6ab1bdc73cbfc2f607721094c2 mesa-13.0.3.tar.gz
+d9aa8be5c176d00d0cd503cb2f64a5a403ea471ec819c022581414860d7ba40e mesa-13.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99038">Bug 99038</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chad Versace (2):</p>
+<ul>
+ <li>i965/mt: Disable aux surfaces after making miptree shareable</li>
+ <li>egl: Fix crashes in eglCreate*Surface()</li>
+</ul>
+
+<p>Dave Airlie (4):</p>
+<ul>
+ <li>anv: set maxFragmentDualSrcAttachments to 1</li>
+ <li>radv: set maxFragmentDualSrcAttachments to 1</li>
+ <li>radv: fix another regression since shadow fixes.</li>
+ <li>radv: add missing license file to radv_meta_bufimage.</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+ <li>docs: add sha256 checksums for 13.0.2</li>
+ <li>anv: don't double-close the same fd</li>
+ <li>anv: don't leak memory if anv_init_wsi() fails</li>
+ <li>radv: don't leak the fd if radv_physical_device_init() succeeds</li>
+ <li>Update version to 13.0.3</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+ <li>vc4: In a loop break/continue, jump if everyone has taken the path.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (3):</p>
+<ul>
+ <li>anv: Add missing error-checking to anv_block_pool_init (v2)</li>
+ <li>anv: Update the teardown in reverse order of the anv_CreateDevice</li>
+ <li>vulkan/wsi: Fix resource leak in success path of wsi_queue_init()</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+ <li>compiler/glsl: fix precision problem of tanh</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+ <li>mesa: only verify that enabled arrays have backing buffers</li>
+</ul>
+
+<p>Jason Ekstrand (8):</p>
+<ul>
+ <li>anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push constants are dirty</li>
+ <li>anv/image: Rename hiz_surface to aux_surface</li>
+ <li>anv/cmd_buffer: Remove the 1-D case from the HiZ QPitch calculation</li>
+ <li>genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode</li>
+ <li>anv/device: Return the right error for failed maps</li>
+ <li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
+ <li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
+ <li>spirv: Use a simpler and more correct implementaiton of tanh()</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+ <li>i965: Allocate at least some URB space even when max_vertices = 0.</li>
+</ul>
+
+<p>Marek Olšák (17):</p>
+<ul>
+ <li>radeonsi: always set all blend registers</li>
+ <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+ <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+ <li>radeonsi: consolidate max-work-group-size computation</li>
+ <li>radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips</li>
+ <li>radeonsi: apply a TC L1 write corruption workaround for SI</li>
+ <li>radeonsi: apply a tessellation bug workaround for SI</li>
+ <li>radeonsi: add a tess+GS hang workaround for VI dGPUs</li>
+ <li>radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well</li>
+ <li>cso: don't release sampler states that are bound</li>
+ <li>radeonsi: always restore sampler states when unbinding sampler views</li>
+ <li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
+ <li>radeonsi: allow specifying simm16 of emit_waitcnt at call sites</li>
+ <li>radeonsi: wait for outstanding memory instructions in TCS barriers</li>
+ <li>tgsi: fix the src type of TGSI_OPCODE_MEMBAR</li>
+ <li>radeonsi: wait for outstanding LDS instructions in memory barriers if needed</li>
+ <li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+ <li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
+ <li>i965/fs: Add unit tests for copy propagation pass.</li>
+ <li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+ <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+ <li>radeonsi: fix isolines tess factor writes to control ring</li>
+ <li>radeonsi: update all GSVS ring descriptors for new buffer allocations</li>
+ <li>radeonsi: do not kill GS with memory writes</li>
+ <li>radeonsi: fix an off-by-one error in the bounds check for max_vertices</li>
+</ul>
+
+<p>Rhys Kidd (1):</p>
+<ul>
+ <li>glsl: Add pthread libs to cache_test</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+ <li>mesa: fix active subroutine uniforms properly</li>
+ <li>Revert "nir: Turn imov/fmov of undef into undef."</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 0daef08..ccf10ac 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3545,7 +3545,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
if (instr->op == nir_texop_query_levels)
result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
- else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod)
+ else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
else if (instr->op == nir_texop_txs &&
instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 94a2ef0..86d5777 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -91,6 +91,7 @@ radv_physical_device_init(struct radv_physical_device *device,
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
device->name = device->rad_info.name;
+ close(fd);
return VK_SUCCESS;
fail:
@@ -424,7 +425,7 @@ void radv_GetPhysicalDeviceProperties(
.maxGeometryTotalOutputComponents = 1024,
.maxFragmentInputComponents = 128,
.maxFragmentOutputAttachments = 8,
- .maxFragmentDualSrcAttachments = 2,
+ .maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 8,
.maxComputeSharedMemorySize = 32768,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c
index 287ab3f..a6204c4 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -1,6 +1,33 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
#include "radv_meta.h"
#include "nir/nir_builder.h"
+/*
+ * Compute shader implementation of image->buffer copy.
+ */
+
static nir_shader *
build_nir_itob_compute_shader(struct radv_device *dev)
{
diff --git a/src/compiler/Makefile.glsl.am b/src/compiler/Makefile.glsl.am
index 80dfb73..15bea6b 100644
--- a/src/compiler/Makefile.glsl.am
+++ b/src/compiler/Makefile.glsl.am
@@ -62,8 +62,11 @@ glsl_tests_blob_test_LDADD = \
glsl_tests_cache_test_SOURCES = \
glsl/tests/cache_test.c
+glsl_tests_cache_test_CFLAGS = \
+ $(PTHREAD_CFLAGS)
glsl_tests_cache_test_LDADD = \
- glsl/libglsl.la
+ glsl/libglsl.la \
+ $(PTHREAD_LIBS)
glsl_tests_general_ir_test_SOURCES = \
glsl/tests/builtin_variable_test.cpp \
diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp
index 3e4bcbb..3dead1a 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3563,9 +3563,17 @@ builtin_builder::_tanh(const glsl_type *type)
ir_variable *x = in_var(type, "x");
MAKE_SIG(type, v130, 1, x);
+ /* Clamp x to [-10, +10] to avoid precision problems.
+ * When x > 10, e^(-x) is so small relative to e^x that it gets flushed to
+ * zero in the computation e^x + e^(-x). The same happens in the other
+ * direction when x < -10.
+ */
+ ir_variable *t = body.make_temp(type, "tmp");
+ body.emit(assign(t, min2(max2(x, imm(-10.0f)), imm(10.0f))));
+
/* (e^x - e^(-x)) / (e^x + e^(-x)) */
- body.emit(ret(div(sub(exp(x), exp(neg(x))),
- add(exp(x), exp(neg(x))))));
+ body.emit(ret(div(sub(exp(t), exp(neg(t))),
+ add(exp(t), exp(neg(t))))));
return sig;
}
diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp
index b3c3c5a..8529b74 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -633,6 +633,8 @@ private:
uniform->opaque[shader_type].index = this->next_subroutine;
uniform->opaque[shader_type].active = true;
+ prog->_LinkedShaders[shader_type]->NumSubroutineUniforms++;
+
/* Increment the subroutine index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index f62a848..b71c51e 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -3118,7 +3118,6 @@ link_calculate_subroutine_compat(struct gl_shader_program *prog)
if (!uni)
continue;
- sh->NumSubroutineUniforms++;
count = 0;
if (sh->NumSubroutineFunctions == 0) {
linker_error(prog, "subroutine uniform %s defined but no valid functions found\n", uni->type->name);
diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c
index 0f8ba31..c4777a8 100644
--- a/src/compiler/nir/nir_opt_undef.c
+++ b/src/compiler/nir/nir_opt_undef.c
@@ -79,9 +79,7 @@ opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
{
if (alu->op != nir_op_vec2 &&
alu->op != nir_op_vec3 &&
- alu->op != nir_op_vec4 &&
- alu->op != nir_op_fmov &&
- alu->op != nir_op_imov)
+ alu->op != nir_op_vec4)
return false;
assert(alu->dest.dest.is_ssa);
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index cb0570d..fbc7ce6 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -565,16 +565,21 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
build_exp(nb, nir_fneg(nb, src[0]))));
return;
- case GLSLstd450Tanh:
- /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
- val->ssa->def =
- nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
- nir_fsub(nb, build_exp(nb, src[0]),
- build_exp(nb, nir_fneg(nb, src[0])))),
- nir_fmul(nb, nir_imm_float(nb, 0.5f),
- nir_fadd(nb, build_exp(nb, src[0]),
- build_exp(nb, nir_fneg(nb, src[0])))));
+ case GLSLstd450Tanh: {
+ /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
+ *
+ * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
+ *
+ * We clamp x to (-inf, +10] to avoid precision problems. When x > 10,
+ * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
+ * computation e^2x +/- 1 so it can be ignored.
+ */
+ nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
+ nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
+ val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
+ nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
return;
+ }
case GLSLstd450Asinh:
val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 697b6fe..471cf7e 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -849,7 +849,7 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE);
#ifdef HAVE_SURFACELESS_PLATFORM
- if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+ if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
/* From the EGL_MESA_platform_surfaceless spec (v1):
*
* eglCreatePlatformWindowSurface fails when called with a <display>
@@ -970,7 +970,7 @@ _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
EGLSurface ret;
#if HAVE_SURFACELESS_PLATFORM
- if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+ if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
/* From the EGL_MESA_platform_surfaceless spec (v1):
*
* [Like eglCreatePlatformWindowSurface,] eglCreatePlatformPixmapSurface
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index b240c93..1f3be4b 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -188,7 +188,9 @@ cso_insert_state(struct cso_cache *sc,
void *state)
{
struct cso_hash *hash = _cso_hash_for_type(sc, type);
- sanitize_hash(sc, hash, type, sc->max_size);
+
+ if (type != CSO_SAMPLER)
+ sanitize_hash(sc, hash, type, sc->max_size);
return cso_hash_insert(hash, hash_key, state);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 18e1bc8..37549aa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -485,6 +485,7 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_UP2H:
case TGSI_OPCODE_U2I64:
+ case TGSI_OPCODE_MEMBAR:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3dbcbc6..f62bbf2 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -85,7 +85,8 @@ void r600_gfx_write_fence(struct r600_common_context *ctx, struct r600_resource
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
- if (ctx->chip_class == CIK) {
+ if (ctx->chip_class == CIK ||
+ ctx->chip_class == VI) {
/* Two EOP events are required to make all engines go idle
* (and optional cache flushes executed) before the timestamp
* is written.
@@ -114,7 +115,8 @@ unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
{
unsigned dwords = 6;
- if (screen->chip_class == CIK)
+ if (screen->chip_class == CIK ||
+ screen->chip_class == VI)
dwords *= 2;
if (!screen->info.has_virtual_memory)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index a35187c..0845711 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -343,6 +343,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
lds_blocks += align(program->local_size, 512) >> 9;
}
+ /* TODO: use si_multiwave_lds_size_workaround */
assert(lds_blocks <= 0xFF);
config->rsrc2 &= C_00B84C_LDS_SIZE;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 19cae65..5ec9881 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -413,13 +413,13 @@ static void si_set_sampler_view(struct si_context *sctx,
struct si_sampler_views *views = &sctx->samplers[shader].views;
struct si_sampler_view *rview = (struct si_sampler_view*)view;
struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
+ uint32_t *desc = descs->list + slot * 16;
if (views->views[slot] == view && !disallow_early_out)
return;
if (view) {
struct r600_texture *rtex = (struct r600_texture *)view->texture;
- uint32_t *desc = descs->list + slot * 16;
assert(rtex); /* views with texture == NULL aren't supported */
pipe_sampler_view_reference(&views->views[slot], view);
@@ -468,9 +468,14 @@ static void si_set_sampler_view(struct si_context *sctx,
rview->is_stencil_sampler, true);
} else {
pipe_sampler_view_reference(&views->views[slot], NULL);
- memcpy(descs->list + slot*16, null_texture_descriptor, 8*4);
+ memcpy(desc, null_texture_descriptor, 8*4);
/* Only clear the lower dwords of FMASK. */
- memcpy(descs->list + slot*16 + 8, null_texture_descriptor, 4*4);
+ memcpy(desc + 8, null_texture_descriptor, 4*4);
+ /* Re-set the sampler state if we are transitioning from FMASK. */
+ if (views->sampler_states[slot])
+ memcpy(desc + 12,
+ views->sampler_states[slot], 4*4);
+
views->enabled_mask &= ~(1u << slot);
}
@@ -803,10 +808,10 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
/* If FMASK is bound, don't overwrite it.
* The sampler state will be set after FMASK is unbound.
*/
- if (samplers->views.views[i] &&
- samplers->views.views[i]->texture &&
- samplers->views.views[i]->texture->target != PIPE_BUFFER &&
- ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
+ if (samplers->views.views[slot] &&
+ samplers->views.views[slot]->texture &&
+ samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
+ ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
continue;
memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index a9faa75..26bd4e5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -187,7 +187,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
/* SI + AMDGPU + CE = GPU hang */
if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib &&
- sscreen->b.chip_class != SI) {
+ sscreen->b.chip_class != SI &&
+ /* These can't use CE due to a power gating bug in the kernel. */
+ sscreen->b.family != CHIP_CARRIZO &&
+ sscreen->b.family != CHIP_STONEY) {
sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
if (!sctx->ce_ib)
goto fail;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0ee760f..60c2401 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2577,10 +2577,18 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
lp_build_const_int32(gallivm,
tess_outer_index * 4), "");
- for (i = 0; i < outer_comps; i++)
- out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
- for (i = 0; i < inner_comps; i++)
- out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+ if (shader->key.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
+ /* For isolines, the hardware expects tess factors in the
+ * reverse order from what GLSL / TGSI specify.
+ */
+ out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer);
+ out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer);
+ } else {
+ for (i = 0; i < outer_comps; i++)
+ out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+ for (i = 0; i < inner_comps; i++)
+ out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+ }
/* Convert the outputs to vectors for stores. */
vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
@@ -3301,6 +3309,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
* point in the program by emitting empty inline assembly that is marked as
* having side effects.
*/
+#if 0 /* unused currently */
static void emit_optimization_barrier(struct si_shader_context *ctx)
{
LLVMBuilderRef builder = ctx->gallivm.builder;
@@ -3308,13 +3317,19 @@ static void emit_optimization_barrier(struct si_shader_context *ctx)
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
LLVMBuildCall(builder, inlineasm, NULL, 0, "");
}
+#endif
+
+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT 0xf7f
+#define LGKM_CNT 0x07f
+#define VM_CNT 0xf70
-static void emit_waitcnt(struct si_shader_context *ctx)
+static void emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
{
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef args[1] = {
- lp_build_const_int32(gallivm, 0xf70)
+ lp_build_const_int32(gallivm, simm16)
};
lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt",
ctx->voidt, args, 1, 0);
@@ -3326,8 +3341,23 @@ static void membar_emit(
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
+ unsigned flags = LLVMConstIntGetZExtValue(src0);
+ unsigned waitcnt = NOOP_WAITCNT;
- emit_waitcnt(ctx);
+ if (flags & TGSI_MEMBAR_THREAD_GROUP)
+ waitcnt &= VM_CNT & LGKM_CNT;
+
+ if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE))
+ waitcnt &= VM_CNT;
+
+ if (flags & TGSI_MEMBAR_SHARED)
+ waitcnt &= LGKM_CNT;
+
+ if (waitcnt != NOOP_WAITCNT)
+ emit_waitcnt(ctx, waitcnt);
}
static LLVMValueRef
@@ -3481,7 +3511,8 @@ static void image_append_args(
struct si_shader_context *ctx,
struct lp_build_emit_data * emit_data,
unsigned target,
- bool atomic)
+ bool atomic,
+ bool force_glc)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
@@ -3489,6 +3520,7 @@ static void image_append_args(
LLVMValueRef r128 = i1false;
LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
LLVMValueRef glc =
+ force_glc ||
inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
i1true : i1false;
LLVMValueRef slc = i1false;
@@ -3543,7 +3575,8 @@ static void buffer_append_args(
LLVMValueRef rsrc,
LLVMValueRef index,
LLVMValueRef offset,
- bool atomic)
+ bool atomic,
+ bool force_glc)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
@@ -3554,6 +3587,7 @@ static void buffer_append_args(
emit_data->args[emit_data->arg_count++] = offset; /* voffset */
if (!atomic) {
emit_data->args[emit_data->arg_count++] =
+ force_glc ||
inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
i1true : i1false; /* glc */
}
@@ -3583,7 +3617,7 @@ static void load_fetch_args(
offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
- offset, false);
+ offset, false, false);
} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
LLVMValueRef coords;
@@ -3593,14 +3627,14 @@ static void load_fetch_args(
if (target == TGSI_TEXTURE_BUFFER) {
rsrc = extract_rsrc_top_half(ctx, rsrc);
buffer_append_args(ctx, emit_data, rsrc, coords,
- bld_base->uint_bld.zero, false);
+ bld_base->uint_bld.zero, false, false);
} else {
emit_data->args[0] = coords;
emit_data->args[1] = rsrc;
emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
emit_data->arg_count = 3;
- image_append_args(ctx, emit_data, target, false);
+ image_append_args(ctx, emit_data, target, false, false);
}
}
}
@@ -3727,7 +3761,7 @@ static void load_emit(
}
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
- emit_waitcnt(ctx);
+ emit_waitcnt(ctx, VM_CNT);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
load_emit_buffer(ctx, emit_data);
@@ -3790,11 +3824,19 @@ static void store_fetch_args(
offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
- offset, false);
+ offset, false, false);
} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
unsigned target = inst->Memory.Texture;
LLVMValueRef coords;
+ /* 8bit/16bit TC L1 write corruption bug on SI.
+ * All store opcodes not aligned to a dword are affected.
+ *
+ * The only way to get unaligned stores in radeonsi is through
+ * shader images.
+ */
+ bool force_glc = ctx->screen->b.chip_class == SI;
+
coords = image_fetch_coords(bld_base, inst, 0);
if (target == TGSI_TEXTURE_BUFFER) {
@@ -3802,14 +3844,14 @@ static void store_fetch_args(
rsrc = extract_rsrc_top_half(ctx, rsrc);
buffer_append_args(ctx, emit_data, rsrc, coords,
- bld_base->uint_bld.zero, false);
+ bld_base->uint_bld.zero, false, force_glc);
} else {
emit_data->args[1] = coords;
image_fetch_rsrc(bld_base, &memory, true, &emit_data->args[2]);
emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
emit_data->arg_count = 4;
- image_append_args(ctx, emit_data, target, false);
+ image_append_args(ctx, emit_data, target, false, force_glc);
}
}
}
@@ -3929,7 +3971,7 @@ static void store_emit(
}
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
- emit_waitcnt(ctx);
+ emit_waitcnt(ctx, VM_CNT);
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
store_emit_buffer(ctx, emit_data);
@@ -3993,7 +4035,7 @@ static void atomic_fetch_args(
offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
- offset, true);
+ offset, true, false);
} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
unsigned target = inst->Memory.Texture;
LLVMValueRef coords;
@@ -4005,12 +4047,12 @@ static void atomic_fetch_args(
if (target == TGSI_TEXTURE_BUFFER) {
rsrc = extract_rsrc_top_half(ctx, rsrc);
buffer_append_args(ctx, emit_data, rsrc, coords,
- bld_base->uint_bld.zero, true);
+ bld_base->uint_bld.zero, true, false);
} else {
emit_data->args[emit_data->arg_count++] = coords;
emit_data->args[emit_data->arg_count++] = rsrc;
- image_append_args(ctx, emit_data, target, true);
+ image_append_args(ctx, emit_data, target, true, false);
}
}
}
@@ -5247,6 +5289,7 @@ static void si_llvm_emit_vertex(
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
SI_PARAM_GS2VS_OFFSET);
LLVMValueRef gs_next_vertex;
@@ -5264,19 +5307,28 @@ static void si_llvm_emit_vertex(
"");
/* If this thread has already emitted the declared maximum number of
- * vertices, kill it: excessive vertex emissions are not supposed to
- * have any effect, and GS threads have no externally observable
- * effects other than emitting vertices.
+ * vertices, skip the write: excessive vertex emissions are not
+ * supposed to have any effect.
+ *
+ * If the shader has no writes to memory, kill it instead. This skips
+ * further memory loads and may allow LLVM to skip to the end
+ * altogether.
*/
- can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
+ can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULT, gs_next_vertex,
lp_build_const_int32(gallivm,
shader->selector->gs_max_out_vertices), "");
- kill = lp_build_select(&bld_base->base, can_emit,
- lp_build_const_float(gallivm, 1.0f),
- lp_build_const_float(gallivm, -1.0f));
- lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
- ctx->voidt, &kill, 1, 0);
+ bool use_kill = !info->writes_memory;
+ if (use_kill) {
+ kill = lp_build_select(&bld_base->base, can_emit,
+ lp_build_const_float(gallivm, 1.0f),
+ lp_build_const_float(gallivm, -1.0f));
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+ ctx->voidt, &kill, 1, 0);
+ } else {
+ lp_build_if(&if_state, gallivm, can_emit);
+ }
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
@@ -5302,6 +5354,7 @@ static void si_llvm_emit_vertex(
1, 0, 1, 1, 0);
}
}
+
gs_next_vertex = lp_build_add(uint, gs_next_vertex,
lp_build_const_int32(gallivm, 1));
@@ -5312,6 +5365,9 @@ static void si_llvm_emit_vertex(
args[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID);
lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
ctx->voidt, args, 2, 0);
+
+ if (!use_kill)
+ lp_build_endif(&if_state);
}
/* Cut one primitive from the geometry shader */
@@ -5344,7 +5400,7 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
* always fits into a single wave.
*/
if (ctx->type == PIPE_SHADER_TESS_CTRL) {
- emit_optimization_barrier(ctx);
+ emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
return;
}
@@ -5481,6 +5537,23 @@ static void declare_tess_lds(struct si_shader_context *ctx)
"tess_lds");
}
+static unsigned si_get_max_workgroup_size(struct si_shader *shader)
+{
+ const unsigned *properties = shader->selector->info.properties;
+ unsigned max_work_group_size =
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
+
+ if (!max_work_group_size) {
+ /* This is a variable group size compute shader,
+ * compile it for the maximum possible group size.
+ */
+ max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+ }
+ return max_work_group_size;
+}
+
static void create_function(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
@@ -5706,22 +5779,9 @@ static void create_function(struct si_shader_context *ctx)
S_0286D0_FRONT_FACE_ENA(1) |
S_0286D0_POS_FIXED_PT_ENA(1));
} else if (ctx->type == PIPE_SHADER_COMPUTE) {
- const unsigned *properties = shader->selector->info.properties;
- unsigned max_work_group_size =
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
-
- if (!max_work_group_size) {
- /* This is a variable group size compute shader,
- * compile it for the maximum possible group size.
- */
- max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
- }
-
si_llvm_add_attribute(ctx->main_fn,
"amdgpu-max-work-group-size",
- max_work_group_size);
+ si_get_max_workgroup_size(shader));
}
shader->info.num_input_sgprs = 0;
@@ -6643,20 +6703,11 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
* LLVM 3.9svn has this bug.
*/
if (sel->type == PIPE_SHADER_COMPUTE) {
- unsigned *props = sel->info.properties;
unsigned wave_size = 64;
unsigned max_vgprs = 256;
unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
unsigned max_sgprs_per_wave = 128;
- unsigned max_block_threads;
-
- if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH])
- max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
- props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
- props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
- else
- max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-
+ unsigned max_block_threads = si_get_max_workgroup_size(shader);
unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
@@ -7746,11 +7797,31 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
return true;
}
-static void si_fix_num_sgprs(struct si_shader *shader)
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+ unsigned *lds_size)
+{
+ /* SPI barrier management bug:
+ * Make sure we have at least 4k of LDS in use to avoid the bug.
+ * It applies to workgroup sizes of more than one wavefront.
+ */
+ if (sscreen->b.family == CHIP_BONAIRE ||
+ sscreen->b.family == CHIP_KABINI ||
+ sscreen->b.family == CHIP_MULLINS)
+ *lds_size = MAX2(*lds_size, 8);
+}
+
+static void si_fix_resource_usage(struct si_screen *sscreen,
+ struct si_shader *shader)
{
unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+
+ if (shader->selector->type == PIPE_SHADER_COMPUTE &&
+ si_get_max_workgroup_size(shader) > 64) {
+ si_multiwave_lds_size_workaround(sscreen,
+ &shader->config.lds_size);
+ }
}
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
@@ -7846,7 +7917,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
- si_fix_num_sgprs(shader);
+ si_fix_resource_usage(sscreen, shader);
si_shader_dump(sscreen, shader, debug, sel->info.processor,
stderr);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b07210c..10bafca 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -482,6 +482,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f);
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+ unsigned *lds_size);
void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
struct si_shader_config *config,
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 85747eb..9e6e3d2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -453,8 +453,14 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
/* Only set dual source blending for MRT0 to avoid a hang. */
- if (i >= 1 && blend->dual_src_blend)
+ if (i >= 1 && blend->dual_src_blend) {
+ /* Vulkan does this for dual source blending. */
+ if (i == 1)
+ blend_cntl |= S_028780_ENABLE(1);
+
+ si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
continue;
+ }
/* Only addition and subtraction equations are supported with
* dual source blending.
@@ -463,16 +469,14 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
(eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
assert(!"Unsupported equation for dual source blending");
+ si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
continue;
}
- if (!state->rt[j].colormask)
- continue;
-
/* cb_render_state will disable unused ones */
blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
- if (!state->rt[j].blend_enable) {
+ if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
continue;
}
@@ -553,6 +557,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
}
if (sctx->b.family == CHIP_STONEY) {
+ /* Disable RB+ blend optimizations for dual source blending.
+ * Vulkan does this.
+ */
+ if (blend->dual_src_blend) {
+ for (int i = 0; i < 8; i++) {
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+ }
+ }
+
for (int i = 0; i < 8; i++)
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
sx_mrt_blend_opt[i]);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index d18137b..6bbe36d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -154,6 +154,12 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
*/
*num_patches = MIN2(*num_patches, 40);
+ /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
+ if (sctx->b.chip_class == SI) {
+ unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ *num_patches = MIN2(*num_patches, one_wave);
+ }
+
output_patch0_offset = input_patch_size * *num_patches;
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
@@ -162,11 +168,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
if (sctx->b.chip_class >= CIK) {
assert(lds_size <= 65536);
- ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+ lds_size = align(lds_size, 512) / 512;
} else {
assert(lds_size <= 32768);
- ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+ lds_size = align(lds_size, 256) / 256;
}
+ si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
if (sctx->last_ls == ls->current &&
sctx->last_tcs == tcs &&
@@ -284,10 +292,18 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
if (sctx->screen->has_distributed_tess) {
- if (sctx->gs_shader.cso)
+ if (sctx->gs_shader.cso) {
partial_es_wave = true;
- else
+
+ /* GPU hang workaround. */
+ if (sctx->b.family == CHIP_TONGA ||
+ sctx->b.family == CHIP_FIJI ||
+ sctx->b.family == CHIP_POLARIS10 ||
+ sctx->b.family == CHIP_POLARIS11)
+ partial_vs_wave = true;
+ } else {
partial_vs_wave = true;
+ }
}
}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 137a5d1..0bb60cb 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1777,10 +1777,15 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
sctx->esgs_ring, 0, sctx->esgs_ring->width0,
false, false, 0, 0, 0);
}
- if (sctx->gsvs_ring)
+ if (sctx->gsvs_ring) {
si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS,
sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
false, false, 0, 0, 0);
+
+ /* Also update SI_GS_RING_GSVSi descriptors. */
+ sctx->last_gsvs_itemsize = 0;
+ }
+
return true;
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 05e2021..15e8984 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1865,22 +1865,29 @@ ntq_emit_if(struct vc4_compile *c, nir_if *if_stmt)
static void
ntq_emit_jump(struct vc4_compile *c, nir_jump_instr *jump)
{
+ struct qblock *jump_block;
switch (jump->type) {
case nir_jump_break:
- qir_SF(c, c->execute);
- qir_MOV_cond(c, QPU_COND_ZS, c->execute,
- qir_uniform_ui(c, c->loop_break_block->index));
+ jump_block = c->loop_break_block;
break;
-
case nir_jump_continue:
- qir_SF(c, c->execute);
- qir_MOV_cond(c, QPU_COND_ZS, c->execute,
- qir_uniform_ui(c, c->loop_cont_block->index));
+ jump_block = c->loop_cont_block;
break;
-
- case nir_jump_return:
- unreachable("All returns shouold be lowered\n");
+ default:
+ unreachable("Unsupported jump type\n");
}
+
+ qir_SF(c, c->execute);
+ qir_MOV_cond(c, QPU_COND_ZS, c->execute,
+ qir_uniform_ui(c, jump_block->index));
+
+ /* Jump to the destination block if everyone has taken the jump. */
+ qir_SF(c, qir_SUB(c, c->execute, qir_uniform_ui(c, jump_block->index)));
+ qir_BRANCH(c, QPU_COND_BRANCH_ALL_ZS);
+ struct qblock *new_block = qir_new_block(c);
+ qir_link_blocks(c->cur_block, jump_block);
+ qir_link_blocks(c->cur_block, new_block);
+ qir_set_emit_block(c, new_block);
}
static void
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 0dfce3f..5d2bc96 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -3194,7 +3194,7 @@
<value name="Per Process Graphics Address" value="0"/>
<value name="Global Graphics Address" value="1"/>
</field>
- <field name="Register Poll Mode" start="16" end="16" type="uint" default="1"/>
+ <field name="Register Poll Mode" start="16" end="16" type="bool"/>
<field name="Wait Mode" start="15" end="15" type="uint">
<value name="Polling Mode" value="1"/>
<value name="Signal Mode" value="0"/>
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 204c871..cfa27e3 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -246,10 +246,12 @@ anv_ptr_free_list_push(void **list, void *elem)
static uint32_t
anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
-void
+VkResult
anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device, uint32_t block_size)
{
+ VkResult result;
+
assert(util_is_power_of_two(block_size));
pool->device = device;
@@ -260,17 +262,23 @@ anv_block_pool_init(struct anv_block_pool *pool,
pool->fd = memfd_create("block pool", MFD_CLOEXEC);
if (pool->fd == -1)
- return;
+ return vk_error(VK_ERROR_INITIALIZATION_FAILED);
/* Just make it 2GB up-front. The Linux kernel won't actually back it
* with pages until we either map and fault on one of them or we use
* userptr and send a chunk of it off to the GPU.
*/
- if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1)
- return;
+ if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
- u_vector_init(&pool->mmap_cleanups,
- round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128);
+ if (!u_vector_init(&pool->mmap_cleanups,
+ round_to_power_of_two(sizeof(struct anv_mmap_cleanup)),
+ 128)) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
pool->state.next = 0;
pool->state.end = 0;
@@ -279,6 +287,13 @@ anv_block_pool_init(struct anv_block_pool *pool,
/* Immediately grow the pool so we'll have a backing bo. */
pool->state.end = anv_block_pool_grow(pool, &pool->state);
+
+ return VK_SUCCESS;
+
+ fail_fd:
+ close(pool->fd);
+
+ return result;
}
void
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index 17a1c8e..94c3f03 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -498,6 +498,7 @@ anv_descriptor_set_destroy(struct anv_device *device,
struct surface_state_free_list_entry *entry =
set->buffer_views[b].surface_state.map;
entry->next = pool->surface_state_free_list;
+ entry->offset = set->buffer_views[b].surface_state.offset;
pool->surface_state_free_list = entry;
}
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 125df22..5333856 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -24,6 +24,7 @@
#include <assert.h>
#include <stdbool.h>
#include <string.h>
+#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
@@ -162,8 +163,6 @@ anv_physical_device_init(struct anv_physical_device *device,
device->info.max_cs_threads = max_cs_threads;
}
- close(fd);
-
brw_process_intel_debug_variable();
device->compiler = brw_compiler_create(NULL, &device->info);
@@ -175,12 +174,15 @@ anv_physical_device_init(struct anv_physical_device *device,
device->compiler->shader_perf_log = compiler_perf_log;
result = anv_init_wsi(device);
- if (result != VK_SUCCESS)
- goto fail;
+ if (result != VK_SUCCESS) {
+ ralloc_free(device->compiler);
+ goto fail;
+ }
/* XXX: Actually detect bit6 swizzling */
isl_device_init(&device->isl_dev, &device->info, swizzled);
+ close(fd);
return VK_SUCCESS;
fail:
@@ -527,7 +529,7 @@ void anv_GetPhysicalDeviceProperties(
.maxGeometryTotalOutputComponents = 1024,
.maxFragmentInputComponents = 128,
.maxFragmentOutputAttachments = 8,
- .maxFragmentDualSrcAttachments = 2,
+ .maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 8,
.maxComputeSharedMemorySize = 32768,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
@@ -967,10 +969,10 @@ void anv_DestroyDevice(
{
ANV_FROM_HANDLE(anv_device, device, _device);
- anv_queue_finish(&device->queue);
-
anv_device_finish_blorp(device);
+ anv_queue_finish(&device->queue);
+
#ifdef HAVE_VALGRIND
/* We only need to free these to prevent valgrind errors. The backing
* BO will go away in a couple of lines so we don't actually leak.
@@ -978,22 +980,27 @@ void anv_DestroyDevice(
anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
#endif
+ anv_scratch_pool_finish(device, &device->scratch_pool);
+
anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
anv_gem_close(device, device->workaround_bo.gem_handle);
- anv_bo_pool_finish(&device->batch_bo_pool);
- anv_state_pool_finish(&device->dynamic_state_pool);
- anv_block_pool_finish(&device->dynamic_state_block_pool);
- anv_state_pool_finish(&device->instruction_state_pool);
- anv_block_pool_finish(&device->instruction_block_pool);
anv_state_pool_finish(&device->surface_state_pool);
anv_block_pool_finish(&device->surface_state_block_pool);
- anv_scratch_pool_finish(device, &device->scratch_pool);
+ anv_state_pool_finish(&device->instruction_state_pool);
+ anv_block_pool_finish(&device->instruction_block_pool);
+ anv_state_pool_finish(&device->dynamic_state_pool);
+ anv_block_pool_finish(&device->dynamic_state_block_pool);
- close(device->fd);
+ anv_bo_pool_finish(&device->batch_bo_pool);
+ pthread_cond_destroy(&device->queue_submit);
pthread_mutex_destroy(&device->mutex);
+ anv_gem_destroy_context(device, device->context_id);
+
+ close(device->fd);
+
vk_free(&device->alloc, device);
}
@@ -1236,6 +1243,9 @@ VkResult anv_AllocateMemory(
mem->type_index = pAllocateInfo->memoryTypeIndex;
+ mem->map = NULL;
+ mem->map_size = 0;
+
*pMem = anv_device_memory_to_handle(mem);
return VK_SUCCESS;
@@ -1257,6 +1267,9 @@ void anv_FreeMemory(
if (mem == NULL)
return;
+ if (mem->map)
+ anv_UnmapMemory(_device, _mem);
+
if (mem->bo.map)
anv_gem_munmap(mem->bo.map, mem->bo.size);
@@ -1303,8 +1316,12 @@ VkResult anv_MapMemory(
/* Let's map whole pages */
map_size = align_u64(map_size, 4096);
- mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
- map_offset, map_size, gem_flags);
+ void *map = anv_gem_mmap(device, mem->bo.gem_handle,
+ map_offset, map_size, gem_flags);
+ if (map == MAP_FAILED)
+ return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
+
+ mem->map = map;
mem->map_size = map_size;
*ppData = mem->map + (offset - map_offset);
@@ -1322,6 +1339,9 @@ void anv_UnmapMemory(
return;
anv_gem_munmap(mem->map, mem->map_size);
+
+ mem->map = NULL;
+ mem->map_size = 0;
}
static void
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index e654689..0dde6d9 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -88,10 +88,8 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
};
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
- if (ret != 0) {
- /* FIXME: Is NULL the right error return? Cf MAP_INVALID */
- return NULL;
- }
+ if (ret != 0)
+ return MAP_FAILED;
VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1));
return (void *)(uintptr_t) gem_mmap.addr_ptr;
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 4a4d87e..10491f4 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -194,8 +194,8 @@ make_surface(const struct anv_device *dev,
anv_finishme("Test gen8 multisampled HiZ");
} else {
isl_surf_get_hiz_surf(&dev->isl_dev, &image->depth_surface.isl,
- &image->hiz_surface.isl);
- add_surface(image, &image->hiz_surface);
+ &image->aux_surface.isl);
+ add_surface(image, &image->aux_surface);
}
}
@@ -306,16 +306,16 @@ VkResult anv_BindImageMemory(
/* The offset and size must be a multiple of 4K or else the
* anv_gem_mmap call below will return NULL.
*/
- assert((image->offset + image->hiz_surface.offset) % 4096 == 0);
- assert(image->hiz_surface.isl.size % 4096 == 0);
+ assert((image->offset + image->aux_surface.offset) % 4096 == 0);
+ assert(image->aux_surface.isl.size % 4096 == 0);
/* HiZ surfaces need to have their memory cleared to 0 before they
* can be used. If we let it have garbage data, it can cause GPU
* hangs on some hardware.
*/
void *map = anv_gem_mmap(device, image->bo->gem_handle,
- image->offset + image->hiz_surface.offset,
- image->hiz_surface.isl.size,
+ image->offset + image->aux_surface.offset,
+ image->aux_surface.isl.size,
device->info.has_llc ? 0 : I915_MMAP_WC);
/* If anv_gem_mmap returns NULL, it's likely that the kernel was
@@ -324,9 +324,9 @@ VkResult anv_BindImageMemory(
if (map == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- memset(map, 0, image->hiz_surface.isl.size);
+ memset(map, 0, image->aux_surface.isl.size);
- anv_gem_munmap(map, image->hiz_surface.isl.size);
+ anv_gem_munmap(map, image->aux_surface.isl.size);
}
return VK_SUCCESS;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 06cdc0a..9c87105 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -416,8 +416,8 @@ anv_state_clflush(struct anv_state state)
anv_clflush_range(state.map, state.alloc_size);
}
-void anv_block_pool_init(struct anv_block_pool *pool,
- struct anv_device *device, uint32_t block_size);
+VkResult anv_block_pool_init(struct anv_block_pool *pool,
+ struct anv_device *device, uint32_t block_size);
void anv_block_pool_finish(struct anv_block_pool *pool);
int32_t anv_block_pool_alloc(struct anv_block_pool *pool);
int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool);
@@ -1526,10 +1526,11 @@ struct anv_image {
struct {
struct anv_surface depth_surface;
- struct anv_surface hiz_surface;
struct anv_surface stencil_surface;
};
};
+
+ struct anv_surface aux_surface;
};
static inline uint32_t
@@ -1593,11 +1594,11 @@ anv_image_get_surface_for_aspect_mask(const struct anv_image *image,
static inline bool
anv_image_has_hiz(const struct anv_image *image)
{
- /* We must check the aspect because anv_image::hiz_surface belongs to
- * a union.
+ /* We must check the aspect because anv_image::aux_surface may be used for
+ * any type of auxiliary surface, not just HiZ.
*/
return (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- image->hiz_surface.isl.size > 0;
+ image->aux_surface.isl.size > 0;
}
struct anv_buffer_view {
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index f1b5387..4e92cca 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1356,22 +1356,13 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
assert(result == VK_SUCCESS);
}
+
result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
assert(result == VK_SUCCESS);
-
- struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
-
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
- if (push_state.alloc_size) {
- anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
- curbe.CURBETotalDataLength = push_state.alloc_size;
- curbe.CURBEDataStartAddress = push_state.offset;
- }
- }
-
const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared);
struct anv_state state =
@@ -1441,6 +1432,18 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
+ if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
+ struct anv_state push_state =
+ anv_cmd_buffer_cs_push_constants(cmd_buffer);
+
+ if (push_state.alloc_size) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
+ curbe.CURBETotalDataLength = push_state.alloc_size;
+ curbe.CURBEDataStartAddress = push_state.offset;
+ }
+ }
+ }
+
cmd_buffer->state.compute_dirty = 0;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@@ -1796,10 +1799,10 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
if (has_hiz) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hdb) {
hdb.HierarchicalDepthBufferObjectControlState = GENX(MOCS);
- hdb.SurfacePitch = image->hiz_surface.isl.row_pitch - 1;
+ hdb.SurfacePitch = image->aux_surface.isl.row_pitch - 1;
hdb.SurfaceBaseAddress = (struct anv_address) {
.bo = image->bo,
- .offset = image->offset + image->hiz_surface.offset,
+ .offset = image->offset + image->aux_surface.offset,
};
#if GEN_GEN >= 8
/* From the SKL PRM Vol2a:
@@ -1809,11 +1812,14 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
* - SURFTYPE_1D: distance in pixels between array slices
* - SURFTYPE_2D/CUBE: distance in rows between array slices
* - SURFTYPE_3D: distance in rows between R - slices
+ *
+ * Unfortunately, the docs aren't 100% accurate here. They fail to
+ * mention that the 1-D rule only applies to linear 1-D images.
+ * Since depth and HiZ buffers are always tiled, they are treated as
+ * 2-D images. Prior to Sky Lake, this field is always in rows.
*/
hdb.SurfaceQPitch =
- image->hiz_surface.isl.dim == ISL_SURF_DIM_1D ?
- isl_surf_get_array_pitch_el(&image->hiz_surface.isl) >> 2 :
- isl_surf_get_array_pitch_el_rows(&image->hiz_surface.isl) >> 2;
+ isl_surf_get_array_pitch_el_rows(&image->aux_surface.isl) >> 2;
#endif
}
} else {
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index a192fc0..4b00977 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -106,6 +106,7 @@ TEST_LIBS = \
TESTS = \
test_fs_cmod_propagation \
+ test_fs_copy_propagation \
test_fs_saturate_propagation \
test_eu_compact \
test_vf_float_conversions \
@@ -121,6 +122,12 @@ test_fs_cmod_propagation_LDADD = \
$(top_builddir)/src/gtest/libgtest.la \
$(TEST_LIBS)
+test_fs_copy_propagation_SOURCES = \
+ test_fs_copy_propagation.cpp
+test_fs_copy_propagation_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(TEST_LIBS)
+
test_fs_saturate_propagation_SOURCES = \
test_fs_saturate_propagation.cpp
test_fs_saturate_propagation_LDADD = \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index afb1057..c4cbf84 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5692,7 +5692,7 @@ fs_visitor::optimize()
OPT(opt_algebraic);
OPT(opt_cse);
- OPT(opt_copy_propagate);
+ OPT(opt_copy_propagation);
OPT(opt_predicated_break, this);
OPT(opt_cmod_propagation);
OPT(dead_code_eliminate);
@@ -5716,7 +5716,7 @@ fs_visitor::optimize()
}
if (OPT(lower_d2x)) {
- OPT(opt_copy_propagate);
+ OPT(opt_copy_propagation);
OPT(dead_code_eliminate);
}
@@ -5728,12 +5728,12 @@ fs_visitor::optimize()
OPT(lower_logical_sends);
if (progress) {
- OPT(opt_copy_propagate);
+ OPT(opt_copy_propagation);
/* Only run after logical send lowering because it's easier to implement
* in terms of physical sends.
*/
if (OPT(opt_zero_samples))
- OPT(opt_copy_propagate);
+ OPT(opt_copy_propagation);
/* Run after logical send lowering to give it a chance to CSE the
* LOAD_PAYLOAD instructions created to construct the payloads of
* e.g. texturing messages in cases where it wasn't possible to CSE the
@@ -5762,7 +5762,7 @@ fs_visitor::optimize()
if (devinfo->gen <= 5 && OPT(lower_minmax)) {
OPT(opt_cmod_propagation);
OPT(opt_cse);
- OPT(opt_copy_propagate);
+ OPT(opt_copy_propagation);
OPT(dead_code_eliminate);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index da01174..3a53768 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -133,11 +133,11 @@ public:
bool opt_redundant_discard_jumps();
bool opt_cse();
bool opt_cse_local(bblock_t *block);
- bool opt_copy_propagate();
+ bool opt_copy_propagation();
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
- bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
- exec_list *acp);
+ bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
+ exec_list *acp);
bool opt_drop_redundant_mov_to_flags();
bool opt_register_renaming();
bool register_coalesce();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index e4e6816..da02fb1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -129,7 +129,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
acp[next_acp] = entry;
- /* opt_copy_propagate_local populates out_acp with copies created
+ /* opt_copy_propagation_local populates out_acp with copies created
* in a block which are still live at the end of the block. This
* is exactly what we want in the COPY set.
*/
@@ -431,7 +431,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->saturate) {
switch(inst->opcode) {
case BRW_OPCODE_SEL:
- if (inst->src[1].file != IMM ||
+ if ((inst->conditional_mod != BRW_CONDITIONAL_GE &&
+ inst->conditional_mod != BRW_CONDITIONAL_L) ||
+ inst->src[1].file != IMM ||
inst->src[1].f < 0.0 ||
inst->src[1].f > 1.0) {
return false;
@@ -735,8 +737,8 @@ can_propagate_from(fs_inst *inst)
* list.
*/
bool
-fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
- exec_list *acp)
+fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
+ exec_list *acp)
{
bool progress = false;
@@ -819,7 +821,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
}
bool
-fs_visitor::opt_copy_propagate()
+fs_visitor::opt_copy_propagation()
{
bool progress = false;
void *copy_prop_ctx = ralloc_context(NULL);
@@ -832,8 +834,8 @@ fs_visitor::opt_copy_propagate()
* the set of copies available at the end of the block.
*/
foreach_block (block, cfg) {
- progress = opt_copy_propagate_local(copy_prop_ctx, block,
- out_acp[block->num]) || progress;
+ progress = opt_copy_propagation_local(copy_prop_ctx, block,
+ out_acp[block->num]) || progress;
}
/* Do dataflow analysis for those available copies. */
@@ -852,7 +854,8 @@ fs_visitor::opt_copy_propagate()
}
}
- progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress;
+ progress = opt_copy_propagation_local(copy_prop_ctx, block, in_acp) ||
+ progress;
}
for (int i = 0; i < cfg->num_blocks; i++)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index b0ee289..ac200d2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -780,7 +780,13 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
if (compiler->devinfo->gen >= 8)
output_size_bytes += 32;
- assert(output_size_bytes >= 1);
+ /* Shaders can technically set max_vertices = 0, at which point we
+ * may have a URB size of 0 bytes. Nothing good can come from that,
+ * so enforce a minimum size.
+ */
+ if (output_size_bytes == 0)
+ output_size_bytes = 1;
+
unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (compiler->devinfo->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index aba203a..78c7a11 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2159,6 +2159,8 @@ intel_miptree_make_shareable(struct brw_context *brw,
intel_miptree_release(&mt->mcs_mt);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
}
+
+ mt->disable_aux_buffers = true;
}
diff --git a/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp
new file mode 100644
index 0000000..ed2f1e0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <gtest/gtest.h>
+#include "brw_fs.h"
+#include "brw_cfg.h"
+#include "program/program.h"
+
+using namespace brw;
+
+class copy_propagation_test : public ::testing::Test {
+ virtual void SetUp();
+
+public:
+ struct brw_compiler *compiler;
+ struct gen_device_info *devinfo;
+ struct gl_context *ctx;
+ struct brw_wm_prog_data *prog_data;
+ struct gl_shader_program *shader_prog;
+ fs_visitor *v;
+};
+
+class copy_propagation_fs_visitor : public fs_visitor
+{
+public:
+ copy_propagation_fs_visitor(struct brw_compiler *compiler,
+ struct brw_wm_prog_data *prog_data,
+ nir_shader *shader)
+ : fs_visitor(compiler, NULL, NULL, NULL,
+ &prog_data->base, (struct gl_program *) NULL,
+ shader, 8, -1) {}
+};
+
+
+void copy_propagation_test::SetUp()
+{
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
+
+ prog_data = ralloc(NULL, struct brw_wm_prog_data);
+ nir_shader *shader =
+ nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
+
+ v = new copy_propagation_fs_visitor(compiler, prog_data, shader);
+
+ devinfo->gen = 4;
+}
+
+static fs_inst *
+instruction(bblock_t *block, int num)
+{
+ fs_inst *inst = (fs_inst *)block->start();
+ for (int i = 0; i < num; i++) {
+ inst = (fs_inst *)inst->next;
+ }
+ return inst;
+}
+
+static bool
+copy_propagation(fs_visitor *v)
+{
+ const bool print = getenv("TEST_DEBUG");
+
+ if (print) {
+ fprintf(stderr, "= Before =\n");
+ v->cfg->dump(v);
+ }
+
+ bool ret = v->opt_copy_propagation();
+
+ if (print) {
+ fprintf(stderr, "\n= After =\n");
+ v->cfg->dump(v);
+ }
+
+ return ret;
+}
+
+TEST_F(copy_propagation_test, basic)
+{
+ const fs_builder &bld = v->bld;
+ fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
+ fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
+ fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
+ fs_reg vgrf3 = v->vgrf(glsl_type::float_type);
+ bld.MOV(vgrf0, vgrf2);
+ bld.ADD(vgrf1, vgrf0, vgrf3);
+
+ /* = Before =
+ *
+ * 0: mov(8) vgrf0 vgrf2
+ * 1: add(8) vgrf1 vgrf0 vgrf3
+ *
+ * = After =
+ * 0: mov(8) vgrf0 vgrf2
+ * 1: add(8) vgrf1 vgrf2 vgrf3
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(copy_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ fs_inst *mov = instruction(block0, 0);
+ EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
+ EXPECT_TRUE(mov->dst.equals(vgrf0));
+ EXPECT_TRUE(mov->src[0].equals(vgrf2));
+
+ fs_inst *add = instruction(block0, 1);
+ EXPECT_EQ(BRW_OPCODE_ADD, add->opcode);
+ EXPECT_TRUE(add->dst.equals(vgrf1));
+ EXPECT_TRUE(add->src[0].equals(vgrf2));
+ EXPECT_TRUE(add->src[1].equals(vgrf3));
+}
+
+TEST_F(copy_propagation_test, maxmax_sat_imm)
+{
+ const fs_builder &bld = v->bld;
+ fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
+ fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
+ fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
+
+ static const struct {
+ enum brw_conditional_mod conditional_mod;
+ float immediate;
+ bool expected_result;
+ } test[] = {
+ /* conditional mod, imm, expected_result */
+ { BRW_CONDITIONAL_GE , 0.1f, true },
+ { BRW_CONDITIONAL_L , 0.1f, true },
+ { BRW_CONDITIONAL_GE , 0.5f, true },
+ { BRW_CONDITIONAL_L , 0.5f, true },
+ { BRW_CONDITIONAL_GE , 0.9f, true },
+ { BRW_CONDITIONAL_L , 0.9f, true },
+ { BRW_CONDITIONAL_GE , -1.5f, false },
+ { BRW_CONDITIONAL_L , -1.5f, false },
+ { BRW_CONDITIONAL_GE , 1.5f, false },
+ { BRW_CONDITIONAL_L , 1.5f, false },
+
+ { BRW_CONDITIONAL_NONE, 0.5f, false },
+ { BRW_CONDITIONAL_Z , 0.5f, false },
+ { BRW_CONDITIONAL_NZ , 0.5f, false },
+ { BRW_CONDITIONAL_G , 0.5f, false },
+ { BRW_CONDITIONAL_LE , 0.5f, false },
+ { BRW_CONDITIONAL_R , 0.5f, false },
+ { BRW_CONDITIONAL_O , 0.5f, false },
+ { BRW_CONDITIONAL_U , 0.5f, false },
+ };
+
+ for (unsigned i = 0; i < sizeof(test) / sizeof(test[0]); i++) {
+ fs_inst *mov = set_saturate(true, bld.MOV(vgrf0, vgrf1));
+ fs_inst *sel = set_condmod(test[i].conditional_mod,
+ bld.SEL(vgrf2, vgrf0,
+ brw_imm_f(test[i].immediate)));
+
+ v->calculate_cfg();
+
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_EQ(test[i].expected_result, copy_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
+ EXPECT_TRUE(mov->saturate);
+ EXPECT_TRUE(mov->dst.equals(vgrf0));
+ EXPECT_TRUE(mov->src[0].equals(vgrf1));
+
+ EXPECT_EQ(BRW_OPCODE_SEL, sel->opcode);
+ EXPECT_EQ(test[i].conditional_mod, sel->conditional_mod);
+ EXPECT_EQ(test[i].expected_result, sel->saturate);
+ EXPECT_TRUE(sel->dst.equals(vgrf2));
+ if (test[i].expected_result) {
+ EXPECT_TRUE(sel->src[0].equals(vgrf1));
+ } else {
+ EXPECT_TRUE(sel->src[0].equals(vgrf0));
+ }
+ EXPECT_TRUE(sel->src[1].equals(brw_imm_f(test[i].immediate)));
+
+ delete v->cfg;
+ v->cfg = NULL;
+ }
+}
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index d3b4cab..071c16d 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -925,7 +925,7 @@ valid_draw_indirect(struct gl_context *ctx,
* buffer bound.
*/
if (_mesa_is_gles31(ctx) &&
- ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexAttribBufferMask) {
+ ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) {
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
return GL_FALSE;
}
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 9204606..64c4ab5 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2850,6 +2850,7 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb,
dst_att->Type = src_att->Type;
dst_att->Complete = src_att->Complete;
dst_att->TextureLevel = src_att->TextureLevel;
+ dst_att->CubeMapFace = src_att->CubeMapFace;
dst_att->Zoffset = src_att->Zoffset;
dst_att->Layered = src_att->Layered;
}
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index 19aaf48..97fd4ce 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -67,9 +67,7 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface)
}
static struct gl_shader_program *
-lookup_linked_program(GLuint program,
- const char *caller,
- bool raise_link_error)
+lookup_linked_program(GLuint program, const char *caller)
{
GET_CURRENT_CONTEXT(ctx);
struct gl_shader_program *prog =
@@ -79,66 +77,13 @@ lookup_linked_program(GLuint program,
return NULL;
if (prog->LinkStatus == GL_FALSE) {
- if (raise_link_error)
- _mesa_error(ctx, GL_INVALID_OPERATION, "%s(program not linked)",
- caller);
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(program not linked)",
+ caller);
return NULL;
}
return prog;
}
-static GLenum
-stage_from_program_interface(GLenum programInterface)
-{
- switch(programInterface) {
- case GL_VERTEX_SUBROUTINE_UNIFORM:
- return MESA_SHADER_VERTEX;
- case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
- return MESA_SHADER_TESS_CTRL;
- case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
- return MESA_SHADER_TESS_EVAL;
- case GL_GEOMETRY_SUBROUTINE_UNIFORM:
- return MESA_SHADER_GEOMETRY;
- case GL_FRAGMENT_SUBROUTINE_UNIFORM:
- return MESA_SHADER_FRAGMENT;
- case GL_COMPUTE_SUBROUTINE_UNIFORM:
- return MESA_SHADER_COMPUTE;
- default:
- unreachable("unexpected programInterface value");
- }
-}
-
-static struct gl_linked_shader *
-lookup_linked_shader(GLuint program,
- GLenum programInterface,
- const char *caller)
-{
- struct gl_shader_program *shLinkedProg =
- lookup_linked_program(program, caller, false);
- gl_shader_stage stage = stage_from_program_interface(programInterface);
-
- if (!shLinkedProg)
- return NULL;
-
- return shLinkedProg->_LinkedShaders[stage];
-}
-
-static bool
-is_subroutine_uniform_program_interface(GLenum programInterface)
-{
- switch(programInterface) {
- case GL_VERTEX_SUBROUTINE_UNIFORM:
- case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
- case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
- case GL_GEOMETRY_SUBROUTINE_UNIFORM:
- case GL_FRAGMENT_SUBROUTINE_UNIFORM:
- case GL_COMPUTE_SUBROUTINE_UNIFORM:
- return true;
- default:
- return false;
- }
-}
-
void GLAPIENTRY
_mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
GLenum pname, GLint *params)
@@ -174,49 +119,9 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
/* Validate pname against interface. */
switch(pname) {
case GL_ACTIVE_RESOURCES:
- if (is_subroutine_uniform_program_interface(programInterface)) {
- /* ARB_program_interface_query doesn't explicitly says that those
- * uniforms would need a linked shader, or that should fail if it is
- * not the case, but Section 7.6 (Uniform Variables) of the OpenGL
- * 4.4 Core Profile says:
- *
- * "A uniform is considered an active uniform if the compiler and
- * linker determine that the uniform will actually be accessed
- * when the executable code is executed. In cases where the
- * compiler and linker cannot make a conclusive determination,
- * the uniform will be considered active."
- *
- * So in order to know the real number of active subroutine uniforms
- * we would need a linked shader .
- *
- * At the same time, Section 7.3 (Program Objects) of the OpenGL 4.4
- * Core Profile says:
- *
- * "The GL provides various commands allowing applications to
- * enumerate and query properties of active variables and in-
- * terface blocks for a specified program. If one of these
- * commands is called with a program for which LinkProgram
- * succeeded, the information recorded when the program was
- * linked is returned. If one of these commands is called with a
- * program for which LinkProgram failed, no error is generated
- * unless otherwise noted."
- * <skip>
- * "If one of these commands is called with a program for which
- * LinkProgram had never been called, no error is generated
- * unless otherwise noted, and the program object is considered
- * to have no active variables or interface blocks."
- *
- * So if the program is not linked we will return 0.
- */
- struct gl_linked_shader *sh =
- lookup_linked_shader(program, programInterface, "glGetProgramInterfaceiv");
-
- *params = sh ? sh->NumSubroutineUniforms : 0;
- } else {
- for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++)
- if (shProg->ProgramResourceList[i].Type == programInterface)
- (*params)++;
- }
+ for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++)
+ if (shProg->ProgramResourceList[i].Type == programInterface)
+ (*params)++;
break;
case GL_MAX_NAME_LENGTH:
if (programInterface == GL_ATOMIC_COUNTER_BUFFER ||
@@ -500,7 +405,7 @@ _mesa_GetProgramResourceLocation(GLuint program, GLenum programInterface,
}
struct gl_shader_program *shProg =
- lookup_linked_program(program, "glGetProgramResourceLocation", true);
+ lookup_linked_program(program, "glGetProgramResourceLocation");
if (!shProg || !name)
return -1;
@@ -556,7 +461,7 @@ _mesa_GetProgramResourceLocationIndex(GLuint program, GLenum programInterface,
}
struct gl_shader_program *shProg =
- lookup_linked_program(program, "glGetProgramResourceLocationIndex", true);
+ lookup_linked_program(program, "glGetProgramResourceLocationIndex");
if (!shProg || !name)
return -1;
diff --git a/src/vulkan/wsi/wsi_common_queue.h b/src/vulkan/wsi/wsi_common_queue.h
index 0e72c8d..6d489cb 100644
--- a/src/vulkan/wsi/wsi_common_queue.h
+++ b/src/vulkan/wsi/wsi_common_queue.h
@@ -65,6 +65,7 @@ wsi_queue_init(struct wsi_queue *queue, int length)
if (ret)
goto fail_cond;
+ pthread_condattr_destroy(&condattr);
return 0;
fail_cond: