diff options
author | Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> | 2020-10-05 14:48:44 -0700 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2020-11-04 20:24:48 +0000 |
commit | 296137df53aa0aad78277edbcd48ed09664210a9 (patch) | |
tree | e93f108784f2e7c738bc22fc3a1c48859f78a538 /src/intel/compiler | |
parent | d3d2b73fa3c31b90a622592376585a415e41d3f1 (diff) | |
download | external_mesa3d-296137df53aa0aad78277edbcd48ed09664210a9.tar.gz external_mesa3d-296137df53aa0aad78277edbcd48ed09664210a9.tar.bz2 external_mesa3d-296137df53aa0aad78277edbcd48ed09664210a9.zip |
intel/fs: Implement nir_intrinsic_{load,store}_global_block_intel
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7448>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 50b578103dc..79a04fe150f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4072,6 +4072,36 @@ fs_visitor::swizzle_nir_scratch_addr(const brw::fs_builder &bld, return addr; } +static unsigned +choose_oword_block_size_dwords(unsigned dwords) +{ + unsigned block; + if (dwords >= 32) { + block = 32; + } else if (dwords >= 16) { + block = 16; + } else { + block = 8; + } + assert(block <= dwords); + return block; +} + +static void +increment_a64_address(const fs_builder &bld, fs_reg address, uint32_t v) +{ + if (bld.shader->devinfo->has_64bit_int) { + bld.ADD(address, address, brw_imm_ud(v)); + } else { + fs_reg low = retype(address, BRW_REGISTER_TYPE_UD); + fs_reg high = offset(low, bld, 1); + + /* Add low and if that overflows, add carry to high. */ + bld.ADD(low, low, brw_imm_ud(v))->conditional_mod = BRW_CONDITIONAL_O; + bld.ADD(high, high, brw_imm_ud(0x1))->predicate = BRW_PREDICATE_NORMAL; + } +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -5326,6 +5356,71 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_load_global_block_intel: { + assert(nir_dest_bit_size(instr->dest) == 32); + + fs_reg address = bld.emit_uniformize(get_nir_src(instr->src[0])); + + const fs_builder ubld1 = bld.exec_all().group(1, 0); + const fs_builder ubld8 = bld.exec_all().group(8, 0); + const fs_builder ubld16 = bld.exec_all().group(16, 0); + + const unsigned total = instr->num_components * dispatch_width; + unsigned loaded = 0; + + while (loaded < total) { + const unsigned block = + choose_oword_block_size_dwords(total - loaded); + const unsigned block_bytes = block * 4; + + const fs_builder &ubld = block == 8 ? ubld8 : ubld16; + ubld.emit(SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL, + retype(byte_offset(dest, loaded * 4), BRW_REGISTER_TYPE_UD), + address, + fs_reg(), /* No source data */ + brw_imm_ud(block))->size_written = block_bytes; + + increment_a64_address(ubld1, address, block_bytes); + loaded += block; + } + + assert(loaded == total); + break; + } + + case nir_intrinsic_store_global_block_intel: { + assert(nir_src_bit_size(instr->src[0]) == 32); + + fs_reg address = bld.emit_uniformize(get_nir_src(instr->src[1])); + fs_reg src = get_nir_src(instr->src[0]); + + const fs_builder ubld1 = bld.exec_all().group(1, 0); + const fs_builder ubld8 = bld.exec_all().group(8, 0); + const fs_builder ubld16 = bld.exec_all().group(16, 0); + + const unsigned total = instr->num_components * dispatch_width; + unsigned written = 0; + + while (written < total) { + const unsigned block = + choose_oword_block_size_dwords(total - written); + + const fs_builder &ubld = block == 8 ? ubld8 : ubld16; + ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL, + fs_reg(), + address, + retype(byte_offset(src, written * 4), BRW_REGISTER_TYPE_UD), + brw_imm_ud(block)); + + const unsigned block_bytes = block * 4; + increment_a64_address(ubld1, address, block_bytes); + written += block; + } + + assert(written == total); + break; + } + default: unreachable("unknown intrinsic"); } |