aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>2019-07-16 21:10:08 (GMT)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>2019-07-22 15:20:34 (GMT)
commit3174bc9972fa812989c2bbb4be8d0651024d84f2 (patch)
tree193d0d8dc13bede5d6ddc5c97d35ed0ab0352ef5
parentaa03159120f31a45c795bb1d15113d69c485c2bb (diff)
downloadexternal_mesa3d-3174bc9972fa812989c2bbb4be8d0651024d84f2.zip
external_mesa3d-3174bc9972fa812989c2bbb4be8d0651024d84f2.tar.gz
external_mesa3d-3174bc9972fa812989c2bbb4be8d0651024d84f2.tar.bz2
panfrost: Promote uniform registers late
Rather than creating either a load or a uniform register read with a fixed beginning offset, we always create a load and then promote to a uniform register later. This will allow us to promote in a register pressure aware manner. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
-rw-r--r--src/panfrost/midgard/compiler.h17
-rw-r--r--src/panfrost/midgard/helpers.h6
-rw-r--r--src/panfrost/midgard/meson.build1
-rw-r--r--src/panfrost/midgard/midgard_compile.c97
-rw-r--r--src/panfrost/midgard/midgard_schedule.c59
-rw-r--r--src/panfrost/midgard/mir_promote_uniforms.c76
6 files changed, 174 insertions, 82 deletions
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index bd7c11e..73ec4b5 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -345,6 +345,11 @@ mir_next_op(struct midgard_instruction *ins)
mir_foreach_block(ctx, v_block) \
mir_foreach_instr_in_block(v_block, v)
+#define mir_foreach_instr_global_safe(ctx, v) \
+ mir_foreach_block(ctx, v_block) \
+ mir_foreach_instr_in_block_safe(v_block, v)
+
+
static inline midgard_instruction *
mir_last_in_block(struct midgard_block *block)
@@ -440,6 +445,18 @@ bool mir_has_multiple_writes(compiler_context *ctx, int src);
void mir_create_pipeline_registers(compiler_context *ctx);
+void
+midgard_promote_uniforms(compiler_context *ctx, unsigned pressure);
+
+void
+emit_ubo_read(
+ compiler_context *ctx,
+ unsigned dest,
+ unsigned offset,
+ nir_src *indirect_offset,
+ unsigned index);
+
+
/* Final emission */
void emit_binary_bundle(
diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h
index ef854dc..13b868b 100644
--- a/src/panfrost/midgard/helpers.h
+++ b/src/panfrost/midgard/helpers.h
@@ -40,6 +40,12 @@
op == midgard_alu_op_imov \
)
+#define OP_IS_UBO_READ(op) ( \
+ op == midgard_op_ld_uniform_32 || \
+ op == midgard_op_ld_uniform_16 || \
+ op == midgard_op_ld_uniform_32i \
+ )
+
/* ALU control words are single bit fields with a lot of space */
#define ALU_ENAB_VEC_MUL (1 << 17)
diff --git a/src/panfrost/midgard/meson.build b/src/panfrost/midgard/meson.build
index cbe2600..b467f83 100644
--- a/src/panfrost/midgard/meson.build
+++ b/src/panfrost/midgard/meson.build
@@ -29,6 +29,7 @@ libpanfrost_midgard_files = files(
'midgard_ra_pipeline.c',
'midgard_liveness.c',
'midgard_ops.c',
+ 'mir_promote_uniforms.c',
'cppwrap.cpp',
'disassemble.c',
)
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 8148407..37436e1 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1157,7 +1157,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
* optimized) versions of UBO #0 */
-static void
+void
emit_ubo_read(
compiler_context *ctx,
unsigned dest,
@@ -1167,36 +1167,20 @@ emit_ubo_read(
{
/* TODO: half-floats */
- if (!indirect_offset && offset < ctx->uniform_cutoff && index == 0) {
- /* Fast path: For the first 16 uniforms, direct accesses are
- * 0-cycle, since they're just a register fetch in the usual
- * case. So, we alias the registers while we're still in
- * SSA-space */
-
- int reg_slot = 23 - offset;
- alias_ssa(ctx, dest, SSA_FIXED_REGISTER(reg_slot));
- } else {
- /* Otherwise, read from the 'special' UBO to access
- * higher-indexed uniforms, at a performance cost. More
- * generally, we're emitting a UBO read instruction. */
-
- midgard_instruction ins = m_ld_uniform_32(dest, offset);
-
- /* TODO: Don't split */
- ins.load_store.varying_parameters = (offset & 7) << 7;
- ins.load_store.address = offset >> 3;
+ midgard_instruction ins = m_ld_uniform_32(dest, offset);
- if (indirect_offset) {
- emit_indirect_offset(ctx, indirect_offset);
- ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */
- } else {
- ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */
- }
-
- /* TODO respect index */
+ /* TODO: Don't split */
+ ins.load_store.varying_parameters = (offset & 7) << 7;
+ ins.load_store.address = offset >> 3;
- emit_mir_instruction(ctx, ins);
+ if (indirect_offset) {
+ emit_indirect_offset(ctx, indirect_offset);
+ ins.load_store.unknown = 0x8700 | index; /* xxx: what is this? */
+ } else {
+ ins.load_store.unknown = 0x1E00 | index; /* xxx: what is this? */
}
+
+ emit_mir_instruction(ctx, ins);
}
static void
@@ -2228,57 +2212,6 @@ midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
return progress;
}
-/* The following passes reorder MIR instructions to enable better scheduling */
-
-static void
-midgard_pair_load_store(compiler_context *ctx, midgard_block *block)
-{
- mir_foreach_instr_in_block_safe(block, ins) {
- if (ins->type != TAG_LOAD_STORE_4) continue;
-
- /* We've found a load/store op. Check if next is also load/store. */
- midgard_instruction *next_op = mir_next_op(ins);
- if (&next_op->link != &block->instructions) {
- if (next_op->type == TAG_LOAD_STORE_4) {
- /* If so, we're done since we're a pair */
- ins = mir_next_op(ins);
- continue;
- }
-
- /* Maximum search distance to pair, to avoid register pressure disasters */
- int search_distance = 8;
-
- /* Otherwise, we have an orphaned load/store -- search for another load */
- mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) {
- /* Terminate search if necessary */
- if (!(search_distance--)) break;
-
- if (c->type != TAG_LOAD_STORE_4) continue;
-
- /* Stores cannot be reordered, since they have
- * dependencies. For the same reason, indirect
- * loads cannot be reordered as their index is
- * loaded in r27.w */
-
- if (OP_IS_STORE(c->load_store.op)) continue;
-
- /* It appears the 0x800 bit is set whenever a
- * load is direct, unset when it is indirect.
- * Skip indirect loads. */
-
- if (!(c->load_store.unknown & 0x800)) continue;
-
- /* We found one! Move it up to pair and remove it from the old location */
-
- mir_insert_instruction_before(ins, *c);
- mir_remove_instruction(c);
-
- break;
- }
- }
- }
-}
-
/* If there are leftovers after the below pass, emit actual fmov
* instructions for the slow-but-correct path */
@@ -2358,8 +2291,6 @@ emit_block(compiler_context *ctx, nir_block *block)
/* Perform heavylifting for aliasing */
actualise_ssa_to_alias(ctx);
- midgard_pair_load_store(ctx, this_block);
-
/* Append fragment shader epilogue (value writeout) */
if (ctx->stage == MESA_SHADER_FRAGMENT) {
if (block == nir_impl_last_block(ctx->func->impl)) {
@@ -2564,7 +2495,9 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
compiler_context *ctx = &ictx;
- /* TODO: Decide this at runtime */
+ /* Start off with a safe cutoff, allowing usage of all 16 work
+ * registers. Later, we'll promote uniform reads to uniform registers
+ * if we determine it is beneficial to do so */
ctx->uniform_cutoff = 8;
/* Initialize at a global (not block) level hash tables */
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index ccc641c..97e06d7 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -524,6 +524,59 @@ schedule_block(compiler_context *ctx, midgard_block *block)
block->is_scheduled = true;
}
+/* The following passes reorder MIR instructions to enable better scheduling */
+
+static void
+midgard_pair_load_store(compiler_context *ctx, midgard_block *block)
+{
+ mir_foreach_instr_in_block_safe(block, ins) {
+ if (ins->type != TAG_LOAD_STORE_4) continue;
+
+ /* We've found a load/store op. Check if next is also load/store. */
+ midgard_instruction *next_op = mir_next_op(ins);
+ if (&next_op->link != &block->instructions) {
+ if (next_op->type == TAG_LOAD_STORE_4) {
+ /* If so, we're done since we're a pair */
+ ins = mir_next_op(ins);
+ continue;
+ }
+
+ /* Maximum search distance to pair, to avoid register pressure disasters */
+ int search_distance = 8;
+
+ /* Otherwise, we have an orphaned load/store -- search for another load */
+ mir_foreach_instr_in_block_from(block, c, mir_next_op(ins)) {
+ /* Terminate search if necessary */
+ if (!(search_distance--)) break;
+
+ if (c->type != TAG_LOAD_STORE_4) continue;
+
+ /* Stores cannot be reordered, since they have
+ * dependencies. For the same reason, indirect
+ * loads cannot be reordered as their index is
+ * loaded in r27.w */
+
+ if (OP_IS_STORE(c->load_store.op)) continue;
+
+ /* It appears the 0x800 bit is set whenever a
+ * load is direct, unset when it is indirect.
+ * Skip indirect loads. */
+
+ if (!(c->load_store.unknown & 0x800)) continue;
+
+ /* We found one! Move it up to pair and remove it from the old location */
+
+ mir_insert_instruction_before(ins, *c);
+ mir_remove_instruction(c);
+
+ break;
+ }
+ }
+ }
+}
+
+
+
void
schedule_program(compiler_context *ctx)
{
@@ -531,6 +584,12 @@ schedule_program(compiler_context *ctx)
bool spilled = false;
int iter_count = 10; /* max iterations */
+ midgard_promote_uniforms(ctx, 8);
+
+ mir_foreach_block(ctx, block) {
+ midgard_pair_load_store(ctx, block);
+ }
+
do {
/* We would like to run RA after scheduling, but spilling can
* complicate this */
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c
new file mode 100644
index 0000000..7206cb7
--- /dev/null
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors (Collabora):
+ * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+ */
+
+#include "compiler.h"
+
+/* This pass promotes reads from uniforms from load/store ops to uniform
+ * registers if it is beneficial to do so. Normally, this saves both
+ * instructions and total register pressure, but it does take a toll on the
+ * number of work registers that are available, so this is a balance.
+ *
+ * To cope, we take as an argument the maximum work register pressure in the
+ * program so we allow that many registers through at minimum, to prevent
+ * spilling. If we spill anyway, I mean, it's a lose-lose at that point. */
+
+void
+midgard_promote_uniforms(compiler_context *ctx, unsigned register_pressure)
+{
+ /* For our purposes, pressure is capped at the number of vec4 work
+ * registers, not live values which would consider spills */
+ register_pressure = MAX2(register_pressure, 16);
+
+ mir_foreach_instr_global_safe(ctx, ins) {
+ if (ins->type != TAG_LOAD_STORE_4) continue;
+ if (!OP_IS_UBO_READ(ins->load_store.op)) continue;
+
+ unsigned lo = ins->load_store.varying_parameters >> 7;
+ unsigned hi = ins->load_store.address;
+
+ /* TODO: Combine fields logically */
+ unsigned address = (hi << 3) | lo;
+
+ /* Check this is UBO 0 */
+ if (ins->load_store.unknown & 0xF) continue;
+
+ /* Check we're accessing directly */
+ if (ins->load_store.unknown != 0x1E00) continue;
+
+ /* Check if it's a promotable range */
+ unsigned uniform_reg = 23 - address;
+
+ if (address > 16) continue;
+ if (register_pressure > uniform_reg) continue;
+
+ /* It is, great! Let's promote */
+
+ ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
+
+ unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
+ mir_rewrite_index_src(ctx, ins->ssa_args.dest, promoted);
+
+ mir_remove_instruction(ins);
+ }
+}