aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJose Maria Casanova Crespo <jmcasanova@igalia.com>2019-07-09 17:23:25 (GMT)
committerJose Maria Casanova Crespo <jmcasanova@igalia.com>2019-07-22 01:00:50 (GMT)
commit9bf0bdf7762afef04b923c043242356ccd3de092 (patch)
tree5340a62499bad7414d34b7df7ece67fe64b27158
parentc341ab7ffbac822d3d3cbb3d3ae9d2a19ea3cc9a (diff)
downloadexternal_mesa3d-9bf0bdf7762afef04b923c043242356ccd3de092.zip
external_mesa3d-9bf0bdf7762afef04b923c043242356ccd3de092.tar.gz
external_mesa3d-9bf0bdf7762afef04b923c043242356ccd3de092.tar.bz2
v3d: Avoid scheduling an instruction that stalls waiting for SFU retval
If we detect that a scheduling candidate will stall because having a register source that is the written by the SFU unit in the previous instruction we reduce its priority so any non stalling operation would be chosen. The latency of SFU operations is defined as 2. So they would be scheduled earlier if other candidates have the same priority. Finally we won't merge instructions that stall to a previously chosen one. As the result of the previous one would be waiting for an extra cycle. Although shader-db result show that instruction are hurt with an increase of 0.35% the sum of instructions + stalls is reduced a 0.52%. And the total of sfu-stalls is reduced a 63.51%. It implies also a small increase in the max-temps metric because of scheduling earlier SFU operations. total instructions in shared programs: 9102719 -> 9117851 (0.17%) instructions in affected programs: 4324628 -> 4339760 (0.35%) helped: 4162 HURT: 12128 helped stats (abs) min: 1 max: 10 x̄: 1.28 x̃: 1 helped stats (rel) min: 0.09% max: 4.76% x̄: 0.66% x̃: 0.51% HURT stats (abs) min: 1 max: 27 x̄: 1.69 x̃: 1 HURT stats (rel) min: 0.05% max: 7.69% x̄: 0.87% x̃: 0.68% 95% mean confidence interval for instructions value: 0.90 0.96 95% mean confidence interval for instructions %-change: 0.47% 0.50% Instructions are HURT. total max-temps in shared programs: 1327728 -> 1327812 (<.01%) max-temps in affected programs: 4730 -> 4814 (1.78%) helped: 61 HURT: 134 helped stats (abs) min: 1 max: 2 x̄: 1.08 x̃: 1 helped stats (rel) min: 2.70% max: 13.33% x̄: 4.89% x̃: 4.17% HURT stats (abs) min: 1 max: 3 x̄: 1.12 x̃: 1 HURT stats (rel) min: 1.54% max: 20.00% x̄: 6.10% x̃: 5.26% 95% mean confidence interval for max-temps value: 0.28 0.58 95% mean confidence interval for max-temps %-change: 1.80% 3.52% Max-temps are HURT. total sfu-stalls in shared programs: 99551 -> 36324 (-63.51%) sfu-stalls in affected programs: 95029 -> 31802 (-66.53%) helped: 25882 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 2.44 x̃: 2 helped stats (rel) min: 5.26% max: 100.00% x̄: 79.86% x̃: 100.00% 95% mean confidence interval for sfu-stalls value: -2.47 -2.42 95% mean confidence interval for sfu-stalls %-change: -80.18% -79.54% Sfu-stalls are helped. total inst-and-stalls in shared programs: 9202270 -> 9154175 (-0.52%) inst-and-stalls in affected programs: 5618516 -> 5570421 (-0.86%) helped: 22728 HURT: 855 helped stats (abs) min: 1 max: 31 x̄: 2.16 x̃: 1 helped stats (rel) min: 0.07% max: 16.67% x̄: 1.14% x̃: 0.92% HURT stats (abs) min: 1 max: 5 x̄: 1.25 x̃: 1 HURT stats (rel) min: 0.12% max: 5.26% x̄: 1.24% x̃: 0.86% 95% mean confidence interval for inst-and-stalls value: -2.07 -2.01 95% mean confidence interval for inst-and-stalls %-change: -1.07% -1.05% Inst-and-stalls are helped. v2: Rename v3d_qpu_generates_sfu_stalls to v3d_qpu_instr_is_sfu (Eric) Reviewed-by: Eric Anholt <eric@anholt.net>
-rw-r--r--src/broadcom/compiler/qpu_schedule.c27
1 files changed, 23 insertions, 4 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 370881b..c15218e 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -560,6 +560,11 @@ mux_read_stalls(struct choose_scoreboard *scoreboard,
scoreboard->last_stallable_sfu_reg);
}
+/* We define a max schedule priority to allow negative priorities as result of
+ * substracting this max when an instruction stalls. So instructions that
+ * stall have lower priority than regular instructions. */
+#define MAX_SCHEDULE_PRIORITY 16
+
static int
get_instruction_priority(const struct v3d_qpu_instr *inst)
{
@@ -578,10 +583,6 @@ get_instruction_priority(const struct v3d_qpu_instr *inst)
return next_score;
next_score++;
- /* XXX perf: We should schedule SFU ALU ops so that the reader is 2
- * instructions after the producer if possible, not just 1.
- */
-
/* Default score for things that aren't otherwise special. */
baseline_score = next_score;
next_score++;
@@ -591,6 +592,9 @@ get_instruction_priority(const struct v3d_qpu_instr *inst)
return next_score;
next_score++;
+ /* We should increase the maximum if we assert here */
+ assert(next_score < MAX_SCHEDULE_PRIORITY);
+
return baseline_score;
}
@@ -845,6 +849,18 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
int prio = get_instruction_priority(inst);
+ if (mux_read_stalls(scoreboard, inst)) {
+ /* Don't merge an instruction that stalls */
+ if (prev_inst)
+ continue;
+ else {
+ /* Any instruction that don't stall will have
+ * higher scheduling priority */
+ prio -= MAX_SCHEDULE_PRIORITY;
+ assert(prio < 0);
+ }
+ }
+
/* Found a valid instruction. If nothing better comes along,
* this one works.
*/
@@ -1005,6 +1021,9 @@ instruction_latency(struct schedule_node *before, struct schedule_node *after)
after_inst));
}
+ if (v3d_qpu_instr_is_sfu(before_inst))
+ return 2;
+
return latency;
}