1 files changed, 37 insertions, 36 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 802aa9f76f..0244f59314 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -76,11 +76,10 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    case FIXED_GRF:
    case MRF:
    case ATTR:
-      this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
-                                        REG_SIZE);
+      this->size_written = dst.component_size(exec_size);
       break;
    case BAD_FILE:
-      this->regs_written = 0;
+      this->size_written = 0;
       break;
    case IMM:
    case UNIFORM:
@@ -192,7 +191,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
    fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
    fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
                             vec4_result, surf_index, vec4_offset);
-   inst->regs_written = 4 * bld.dispatch_width() / 8;
+   inst->size_written = 4 * bld.dispatch_width() / 8 * REG_SIZE;
 
    if (type_sz(dst.type) == 8) {
       shuffle_32bit_load_result_to_64bit_data(
@@ -244,7 +243,7 @@ fs_inst::equals(fs_inst *inst) const
 bool
 fs_inst::overwrites_reg(const fs_reg &reg) const
 {
-   return reg.in_range(dst, regs_written);
+   return reg.in_range(dst, DIV_ROUND_UP(size_written, REG_SIZE));
 }
 
 bool
@@ -357,7 +356,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
    if (reg.file != VGRF || reg.offset / REG_SIZE != 0 || reg.stride == 0)
       return false;
 
-   if (grf_alloc.sizes[reg.nr] != this->regs_written)
+   if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written)
       return false;
 
    for (int i = 0; i < this->sources; i++) {
@@ -2548,7 +2547,7 @@ fs_visitor::opt_sampler_eot()
    for (unsigned i = 0; i < FB_WRITE_LOGICAL_NUM_SRCS; i++) {
       if (i == FB_WRITE_LOGICAL_SRC_COLOR0) {
          if (!fb_write->src[i].equals(tex_inst->dst) ||
-             fb_write->regs_read(i) != tex_inst->regs_written)
+             fb_write->regs_read(i) * REG_SIZE != tex_inst->size_written)
          return false;
       } else if (i != FB_WRITE_LOGICAL_SRC_COMPONENTS) {
          if (fb_write->src[i].file != BAD_FILE)
@@ -2564,7 +2563,7 @@ fs_visitor::opt_sampler_eot()
    tex_inst->offset |= fb_write->target << 24;
    tex_inst->eot = true;
    tex_inst->dst = ibld.null_reg_ud();
-   tex_inst->regs_written = 0;
+   tex_inst->size_written = 0;
    fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
 
    /* Marking EOT is sufficient, lower_logical_sends() will notice the EOT
@@ -2606,7 +2605,7 @@ fs_visitor::opt_register_renaming()
 
       if (depth == 0 &&
           inst->dst.file == VGRF &&
-          alloc.sizes[inst->dst.nr] == inst->regs_written &&
+          alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
           !inst->is_partial_write()) {
          if (remap[dst] == -1) {
             remap[dst] = dst;
@@ -2730,7 +2729,7 @@ fs_visitor::compute_to_mrf()
       unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
                              inst->src[0], inst->regs_read(0) * REG_SIZE)) {
 	    /* Found the last thing to write our reg we want to turn
 	     * into a compute-to-MRF.
@@ -2749,7 +2748,7 @@ fs_visitor::compute_to_mrf()
              * a time.
              */
             if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
-                scan_inst->dst.offset / REG_SIZE + scan_inst->regs_written >
+                scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
                 inst->src[0].offset / REG_SIZE + inst->regs_read(0))
                break;
 
@@ -2768,7 +2767,8 @@ fs_visitor::compute_to_mrf()
 
             /* Clear the bits for any registers this instruction overwrites. */
             regs_left &= ~mask_relative_to(
-               inst->src[0], scan_inst->dst, scan_inst->regs_written);
+               inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+                                                          REG_SIZE));
             if (!regs_left)
                break;
 	 }
@@ -2793,8 +2793,8 @@ fs_visitor::compute_to_mrf()
 	 if (interfered)
 	    break;
 
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
-                             inst->dst, inst->regs_written * REG_SIZE)) {
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
+                             inst->dst, inst->size_written)) {
 	    /* If somebody else writes our MRF here, we can't
 	     * compute-to-MRF before that.
 	     */
@@ -2803,7 +2803,7 @@ fs_visitor::compute_to_mrf()
 
          if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1 &&
              regions_overlap(fs_reg(MRF, scan_inst->base_mrf), scan_inst->mlen * REG_SIZE,
-                             inst->dst, inst->regs_written * REG_SIZE)) {
+                             inst->dst, inst->size_written)) {
 	    /* Found a SEND instruction, which means that there are
 	     * live values in MRFs from base_mrf to base_mrf +
 	     * scan_inst->mlen - 1.  Don't go pushing our MRF write up
@@ -2822,11 +2822,12 @@ fs_visitor::compute_to_mrf()
       regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-         if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+         if (regions_overlap(scan_inst->dst, scan_inst->size_written,
                              inst->src[0], inst->regs_read(0) * REG_SIZE)) {
             /* Clear the bits for any registers this instruction overwrites. */
             regs_left &= ~mask_relative_to(
-               inst->src[0], scan_inst->dst, scan_inst->regs_written);
+               inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+                                                          REG_SIZE));
 
             const unsigned rel_offset = (reg_offset(scan_inst->dst) -
                                          reg_offset(inst->src[0])) / REG_SIZE;
@@ -2841,7 +2842,7 @@ fs_visitor::compute_to_mrf()
                /* Clear the COMPR4 bit if the generating instruction is not
                 * compressed.
                 */
-               if (scan_inst->regs_written < 2)
+               if (scan_inst->size_written < 2 * REG_SIZE)
                   scan_inst->dst.nr &= ~BRW_MRF_COMPR4;
 
             } else {
@@ -3024,7 +3025,7 @@ fs_visitor::remove_duplicate_mrf_writes()
       /* Clear out any MRF move records whose sources got overwritten. */
       for (unsigned i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
          if (last_mrf_move[i] &&
-             regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+             regions_overlap(inst->dst, inst->size_written,
                              last_mrf_move[i]->src[0],
                              last_mrf_move[i]->regs_read(0) * REG_SIZE)) {
             last_mrf_move[i] = NULL;
@@ -4603,7 +4604,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
     * which is the one that is going to limit the overall execution size of
     * the instruction due to this rule.
     */
-   unsigned reg_count = inst->regs_written;
+   unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
 
    for (unsigned i = 0; i < inst->sources; i++)
       reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
@@ -4630,13 +4631,14 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
     */
    if (devinfo->gen < 8) {
       for (unsigned i = 0; i < inst->sources; i++) {
-         if (inst->regs_written == 2 &&
+         if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 &&
              inst->regs_read(i) != 0 && inst->regs_read(i) != 2 &&
              !is_uniform(inst->src[i]) &&
              !(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
-               type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1))
-            max_width = MIN2(max_width, inst->exec_size /
-                             inst->regs_written);
+               type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
+            const unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
+            max_width = MIN2(max_width, inst->exec_size / reg_count);
+         }
       }
    }
 
@@ -4681,9 +4683,10 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
     * In this situation we calculate the maximum size of the split
     * instructions so they only ever write to a single register.
     */
-   if (devinfo->gen < 8 && inst->regs_written > 1 &&
+   if (devinfo->gen < 8 && inst->size_written > REG_SIZE &&
        !inst->force_writemask_all) {
-      const unsigned channels_per_grf = inst->exec_size / inst->regs_written;
+      const unsigned channels_per_grf = inst->exec_size /
+         DIV_ROUND_UP(inst->size_written, REG_SIZE);
       unsigned exec_type_size = 0;
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file != BAD_FILE)
@@ -5087,8 +5090,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
     * the results of multiple lowered instructions in order to make sure that
     * they end up arranged correctly in the original destination region.
     */
-   if (inst->regs_written * REG_SIZE >
-       inst->dst.component_size(inst->exec_size))
+   if (inst->size_written > inst->dst.component_size(inst->exec_size))
       return true;
 
    /* If the lowered execution size is larger than the original the result of
@@ -5111,7 +5113,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
        * group which could cause one of the lowered instructions to overwrite
        * the data read from the same source by other lowered instructions.
        */
-      if (regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+      if (regions_overlap(inst->dst, inst->size_written,
                           inst->src[i], inst->regs_read(i) * REG_SIZE) &&
           !inst->dst.equals(inst->src[i]))
         return true;
@@ -5138,8 +5140,8 @@ emit_zip(const fs_builder &lbld, bblock_t *block, fs_inst *inst)
 
    /* Specified channel group from the destination region. */
    const fs_reg dst = horiz_offset(inst->dst, lbld.group());
-   const unsigned dst_size = inst->regs_written * REG_SIZE /
-            inst->dst.component_size(inst->exec_size);
+   const unsigned dst_size = inst->size_written /
+      inst->dst.component_size(inst->exec_size);
 
    if (needs_dst_copy(lbld, inst)) {
       const fs_reg tmp = lbld.vgrf(inst->dst.type, dst_size);
@@ -5191,7 +5193,7 @@ fs_visitor::lower_simd_width()
           * original or the lowered instruction, whichever is lower.
           */
          const unsigned n = DIV_ROUND_UP(inst->exec_size, lower_width);
-         const unsigned dst_size = inst->regs_written * REG_SIZE /
+         const unsigned dst_size = inst->size_written /
             inst->dst.component_size(inst->exec_size);
 
          assert(!inst->writes_accumulator && !inst->mlen);
@@ -5215,9 +5217,8 @@ fs_visitor::lower_simd_width()
                split_inst.src[j] = emit_unzip(lbld, block, inst, j);
 
             split_inst.dst = emit_zip(lbld, block, inst);
-            split_inst.regs_written = DIV_ROUND_UP(
-               split_inst.dst.component_size(lower_width) * dst_size,
-               REG_SIZE);
+            split_inst.size_written =
+               split_inst.dst.component_size(lower_width) * dst_size;
 
             lbld.emit(split_inst);
          }
@@ -5314,7 +5315,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
    switch (inst->dst.file) {
    case VGRF:
       fprintf(file, "vgrf%d", inst->dst.nr);
-      if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
+      if (alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written ||
           inst->dst.offset % REG_SIZE)
          fprintf(file, "+%d.%d",
                  inst->dst.offset / REG_SIZE, inst->dst.offset % REG_SIZE);