summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2016-12-09 09:34:50 -0800
committerEmil Velikov <emil.l.velikov@gmail.com>2016-12-15 16:46:28 +0000
commitfb9f0a1197e10f9b1c727b5b2956f36827308ad1 (patch)
treed2d27d5e8ed04b9be2ae7d0062e45a229ebaf140
parent41c688a6c31ac5b985a3318e082f78103f061977 (diff)
downloadexternal_mesa3d-fb9f0a1197e10f9b1c727b5b2956f36827308ad1.tar.gz
external_mesa3d-fb9f0a1197e10f9b1c727b5b2956f36827308ad1.tar.bz2
external_mesa3d-fb9f0a1197e10f9b1c727b5b2956f36827308ad1.zip
spirv: Use a simpler and more correct implementaiton of tanh()
The new implementation is more correct because it clamps the incoming value to 10 to avoid floating-point overflow. It also uses a much reduced version of the formula which only requires 1 exp() rather than 2. This fixes all of the dEQP-VK.glsl.builtin.precision.tanh.* tests. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Cc: "13.0" <mesa-dev@lists.freedesktop.org> (cherry picked from commit da1c49171d0df185545cfbbd600e287f7c6160fa)
-rw-r--r--src/compiler/spirv/vtn_glsl450.c23
1 files changed, 14 insertions, 9 deletions
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index cb0570d385..fbc7ce6fd8 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -565,16 +565,21 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
build_exp(nb, nir_fneg(nb, src[0]))));
return;
- case GLSLstd450Tanh:
- /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
- val->ssa->def =
- nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
- nir_fsub(nb, build_exp(nb, src[0]),
- build_exp(nb, nir_fneg(nb, src[0])))),
- nir_fmul(nb, nir_imm_float(nb, 0.5f),
- nir_fadd(nb, build_exp(nb, src[0]),
- build_exp(nb, nir_fneg(nb, src[0])))));
+ case GLSLstd450Tanh: {
+ /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
+ *
+ * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
+ *
+ * We clamp x to (-inf, +10] to avoid precision problems. When x > 10,
+ * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
+ * computation e^2x +/- 1 so it can be ignored.
+ */
+ nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
+ nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
+ val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
+ nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
return;
+ }
case GLSLstd450Asinh:
val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),