aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/fft.asm4
-rw-r--r--libavcodec/x86/h264_deblock.asm19
2 files changed, 9 insertions, 14 deletions
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index c87752bae8..e4744a3b60 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -667,13 +667,13 @@ cglobal imdct_calc, 3,5,3
push r1
push r0
%else
- sub rsp, 8
+ sub rsp, 8+32*WIN64 ; allocate win64 shadow space
%endif
call r4
%if ARCH_X86_32
add esp, 12
%else
- add rsp, 8
+ add rsp, 8+32*WIN64
%endif
POP r1
POP r3
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index fc6c983052..6e29ce7373 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -331,16 +331,14 @@ cglobal deblock_v_luma_8, 5,5,10
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
INIT_MMX cpuname
-cglobal deblock_h_luma_8, 5,9
+cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
movsxd r7, r1d
lea r8, [r7+r7*2]
lea r6, [r0-4]
lea r5, [r0-4+r8]
%if WIN64
- sub rsp, 0x98
- %define pix_tmp rsp+0x30
+ %define pix_tmp rsp+0x30 ; shadow space + r4
%else
- sub rsp, 0x68
%define pix_tmp rsp
%endif
@@ -379,11 +377,6 @@ cglobal deblock_h_luma_8, 5,9
movq m3, [pix_tmp+0x40]
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
-%if WIN64
- add rsp, 0x98
-%else
- add rsp, 0x68
-%endif
RET
%endmacro
@@ -704,13 +697,16 @@ INIT_MMX cpuname
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_8, 4,9
+cglobal deblock_h_luma_intra_8, 4,9,0,0x80
movsxd r7, r1d
lea r8, [r7*3]
lea r6, [r0-4]
lea r5, [r0-4+r8]
- sub rsp, 0x88
+%if WIN64
+ %define pix_tmp rsp+0x20 ; shadow space
+%else
%define pix_tmp rsp
+%endif
; transpose 8x16 -> tmp space
TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
@@ -730,7 +726,6 @@ cglobal deblock_h_luma_intra_8, 4,9
sub r5, r7
shr r7, 3
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r7, r8)
- add rsp, 0x88
RET
%else
cglobal deblock_h_luma_intra_8, 2,4,8,0x80