diff options
author | Rob Clark <robdclark@chromium.org> | 2020-11-13 11:48:57 -0800 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2020-11-13 22:44:04 +0000 |
commit | cf9ef90066592333a9113d49328be29583308a60 (patch) | |
tree | 559ac58652ec0e3da25f4427863c241f5e78d097 | |
parent | 4bb5a6c30a79f9a2d95b7c61addac7caecff9219 (diff) | |
download | external_mesa3d-cf9ef90066592333a9113d49328be29583308a60.tar.gz external_mesa3d-cf9ef90066592333a9113d49328be29583308a60.tar.bz2 external_mesa3d-cf9ef90066592333a9113d49328be29583308a60.zip |
freedreno/ir3: Add pass to deal with load_uniform base offsets
With indirect load_uniform, we can only encode 10b of constant base
offset. This pass detects problematic cases and peels out the high
bits of the base offset.
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7612>
-rw-r--r-- | src/freedreno/ir3/ir3_nir.c | 7 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir.h | 1 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c | 73 |
3 files changed, 81 insertions, 0 deletions
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index dfb5f29cc8f..29ab29691e0 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -511,6 +511,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) if (progress) ir3_optimize_loop(s); + /* Fixup indirect load_uniform's which end up with a const base offset + * which is too large to encode. Do this late(ish) so we actually + * can differentiate indirect vs non-indirect. + */ + if (OPT(s, ir3_nir_fixup_load_uniform)) + ir3_optimize_loop(s); + /* Do late algebraic optimization to turn add(a, neg(b)) back into * subs, then the mandatory cleanup after algebraic. Note that it may * produce fnegs, and if so then we need to keep running to squash diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index a6ec1440e9b..d716e530493 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -61,6 +61,7 @@ void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, struct ir3_const_state *const_state); void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v); bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v); +bool ir3_nir_fixup_load_uniform(nir_shader *nir); nir_ssa_def * ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift); diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index de62271ccb1..8e7f9aa29d1 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -457,3 +457,76 @@ ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v) return progress; } + + +static bool +fixup_load_uniform_filter(const nir_instr *instr, const void *arg) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_uniform; +} + +static nir_ssa_def * +fixup_load_uniform_instr(struct nir_builder *b, nir_instr *instr, void *arg) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + /* We don't need to worry about non-indirect case: */ + if (nir_src_is_const(intr->src[0])) + return NULL; + + const unsigned base_offset_limit = (1 << 10); /* 10 bits */ + unsigned base_offset = nir_intrinsic_base(intr); + + /* Or cases were base offset is lower than the hw limit: */ + if (base_offset < base_offset_limit) + return NULL; + + b->cursor = nir_before_instr(instr); + + nir_ssa_def *offset = nir_ssa_for_src(b, intr->src[0], 1); + + /* We'd like to avoid a sequence like: + * + * vec4 32 ssa_18 = intrinsic load_uniform (ssa_4) (1024, 0, 0) + * vec4 32 ssa_19 = intrinsic load_uniform (ssa_4) (1072, 0, 0) + * vec4 32 ssa_20 = intrinsic load_uniform (ssa_4) (1120, 0, 0) + * + * From turning into a unique offset value (which requires reloading + * a0.x for each instruction). So instead of just adding the constant + * base_offset to the non-const offset, be a bit more clever and only + * extract the part that cannot be encoded. Afterwards CSE should + * turn the result into: + * + * vec1 32 ssa_5 = load_const (1024) + * vec4 32 ssa_6 = iadd ssa4_, ssa_5 + * vec4 32 ssa_18 = intrinsic load_uniform (ssa_5) (0, 0, 0) + * vec4 32 ssa_19 = intrinsic load_uniform (ssa_5) (48, 0, 0) + * vec4 32 ssa_20 = intrinsic load_uniform (ssa_5) (96, 0, 0) + */ + unsigned new_base_offset = base_offset % base_offset_limit; + + nir_intrinsic_set_base(intr, new_base_offset); + offset = nir_iadd_imm(b, offset, base_offset - new_base_offset); + + nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(offset)); + + return NIR_LOWER_INSTR_PROGRESS; +} + +/** + * For relative CONST file access, we can only encode 10b worth of fixed offset, + * so in cases where the base offset is larger, we need to peel it out into + * ALU instructions. + * + * This should run late, after constant folding has had a chance to do it's + * thing, so we can actually know if it is an indirect uniform offset or not. + */ +bool +ir3_nir_fixup_load_uniform(nir_shader *nir) +{ + return nir_shader_lower_instructions(nir, + fixup_load_uniform_filter, fixup_load_uniform_instr, + NULL); +} |