summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_vue_map.c
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2015-09-09 16:21:56 -0700
committerKenneth Graunke <kenneth@whitecape.org>2015-09-26 11:59:56 -0700
commit99df02ca26f6127c8fa24d38a8a069ac6159356a (patch)
tree20ab0015cac03ed940b4faa70a7f9f1ac32df5cc /src/mesa/drivers/dri/i965/brw_vue_map.c
parent1e5180316cd62c8c8aa46399ab6a17ad9604d48f (diff)
downloadexternal_mesa3d-99df02ca26f6127c8fa24d38a8a069ac6159356a.tar.gz
external_mesa3d-99df02ca26f6127c8fa24d38a8a069ac6159356a.tar.bz2
external_mesa3d-99df02ca26f6127c8fa24d38a8a069ac6159356a.zip
i965: Don't re-layout varyings for separate shader programs.
Previously, our VUE map code always assigned slots to varyings sequentially, in one contiguous block. This was a bad fit for separate shaders - the GS input layout depended or the VS output layout, so if we swapped out vertex shaders, we might have to recompile the GS on the fly - which rather defeats the point of using separate shader objects. (Tessellation would suffer from this as well - we could have to recompile the HS, DS, and GS.) Instead, this patch makes the VUE map for separate shaders use a fixed layout, based on the input/output variable's location field. (This is either specified by layout(location = ...) or assigned by the linker.) Corresponding inputs/outputs will match up by location; if there's a mismatch, we're allowed to have undefined behavior. This may be less efficient - depending what locations were chosen, we may have empty padding slots in the VUE. But applications presumably use small consecutive integers for locations, so it hopefully won't be much worse in practice. 3% of Dota 2 Reborn shaders are hurt, but only by 2 instructions. This seems like a small price to pay for avoiding recompiles. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vue_map.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vue_map.c51
1 files changed, 40 insertions, 11 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 1ef52143cc..45662bd5af 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -59,10 +59,18 @@ assign_vue_slot(struct brw_vue_map *vue_map, int varying, int slot)
void
brw_compute_vue_map(const struct brw_device_info *devinfo,
struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid)
+ GLbitfield64 slots_valid,
+ bool separate)
{
+ /* Keep using the packed/contiguous layout on old hardware - we only need
+ * the SSO layout when using geometry/tessellation shaders or 32 FS input
+ * varyings, which only exist on Gen >= 6. It's also a bit more efficient.
+ */
+ if (devinfo->gen < 6)
+ separate = false;
+
vue_map->slots_valid = slots_valid;
- int i;
+ vue_map->separate = separate;
/* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
* are stored in the first VUE slot (VARYING_SLOT_PSIZ).
@@ -77,7 +85,7 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
*/
STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
- for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
+ for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
vue_map->varying_to_slot[i] = -1;
vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
}
@@ -131,21 +139,42 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
assign_vue_slot(vue_map, VARYING_SLOT_BFC1, slot++);
}
- /* The hardware doesn't care about the rest of the vertex outputs, so just
- * assign them contiguously. Don't reassign outputs that already have a
- * slot.
+ /* The hardware doesn't care about the rest of the vertex outputs, so we
+ * can assign them however we like. For normal programs, we simply assign
+ * them contiguously.
+ *
+ * For separate shader pipelines, we first assign built-in varyings
+ * contiguous slots. This works because ARB_separate_shader_objects
+ * requires that all shaders have matching built-in varying interface
+ * blocks. Next, we assign generic varyings based on their location
+ * (either explicit or linker assigned). This guarantees a fixed layout.
*
* We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
* since it's encoded as the clip distances by emit_clip_distances().
* However, it may be output by transform feedback, and we'd rather not
* recompute state when TF changes, so we just always include it.
*/
- for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
- if ((slots_valid & BITFIELD64_BIT(i)) &&
- vue_map->varying_to_slot[i] == -1) {
- assign_vue_slot(vue_map, i, slot++);
+ GLbitfield64 builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
+ while (builtins != 0) {
+ const int varying = ffsll(builtins) - 1;
+ if (vue_map->varying_to_slot[varying] == -1) {
+ assign_vue_slot(vue_map, varying, slot++);
+ }
+ builtins &= ~BITFIELD64_BIT(varying);
+ }
+
+ const int first_generic_slot = slot;
+ GLbitfield64 generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
+ while (generics != 0) {
+ const int varying = ffsll(generics) - 1;
+ if (separate) {
+ slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
+ assign_vue_slot(vue_map, varying, slot);
+ } else {
+ assign_vue_slot(vue_map, varying, slot++);
}
+ generics &= ~BITFIELD64_BIT(varying);
}
- vue_map->num_slots = slot;
+ vue_map->num_slots = separate ? slot + 1 : slot;
}