armv8: Remove redundant NEON element size declarations

When specifying one specific lane of the vector, the number of lanes don't need to be specified. The clang built-in assembler doesn't allow the redundant declarations, while binutils gas work fine with both forms. Change-Id: I86077ce0774d4594a1295b6860e4944df87dde2f
author: Martin Storsjo <martin@martin.st> 2015-06-10 12:05:14 +0300
committer: Marco Nelissen <marcone@google.com> 2015-06-25 08:25:46 -0700
commit: 9f81a0a2024d1aa640e15085717a8164f770eba4 (patch)
tree: 4095089abfaace92958b0013b636f09cfbae3374 /common
parent: 436fccb1641f9f25afff6cf20f9d4957c08f43cd (diff)
download: android_external_libavc-9f81a0a2024d1aa640e15085717a8164f770eba4.tar.gz
android_external_libavc-9f81a0a2024d1aa640e15085717a8164f770eba4.tar.bz2
android_external_libavc-9f81a0a2024d1aa640e15085717a8164f770eba4.zip
11 files changed, 93 insertions, 93 deletions
diff --git a/common/armv8/ih264_deblk_chroma_av8.s b/common/armv8/ih264_deblk_chroma_av8.s
index 3021556..a4dbd23 100644
--- a/common/armv8/ih264_deblk_chroma_av8.s
+++ b/common/armv8/ih264_deblk_chroma_av8.s
@@ -337,7 +337,7 @@ ih264_deblk_chroma_horz_bslt4_av8:
     ldr       x9, [sp, #80]
     sub       x0, x0, x1, lsl #1        //x0 = uc_edgePixelU pointing to p1 of chroma U
     rev       w7, w7                    //
-    mov       v12.2s[0], w7             //D12[0] = ui_Bs
+    mov       v12.s[0], w7              //D12[0] = ui_Bs
     ld1       {v16.s}[0], [x8]          //D16[0] contains cliptab_cb
     ld1       {v17.s}[0], [x9]          //D17[0] contains cliptab_cr
     ld2       {v6.8b, v7.8b}, [x0], x1  //Q3=p1
diff --git a/common/armv8/ih264_deblk_luma_av8.s b/common/armv8/ih264_deblk_luma_av8.s
index bcdb03f..1b3950d 100644
--- a/common/armv8/ih264_deblk_luma_av8.s
+++ b/common/armv8/ih264_deblk_luma_av8.s
@@ -97,7 +97,7 @@ ih264_deblk_luma_horz_bslt4_av8:
     sub       x0, x0, x1                //x0 pointer to p2
     rev       w4, w4                    //
     ld1       {v10.8b, v11.8b}, [x0], x1 //p2 values are loaded into q5
-    mov       v12.2s[0], w4             //d12[0] = ui_Bs
+    mov       v12.s[0], w4              //d12[0] = ui_Bs
     mov       x6, x0                    //keeping backup of pointer to p1
     ld1       {v8.8b, v9.8b}, [x0], x1  //p1 values are loaded into q4
     mov       x7, x0                    //keeping backup of pointer to p0
@@ -364,8 +364,8 @@ ih264_deblk_luma_horz_bs4_av8:
     mov       v26.d[1] , v27.d[0]
     mov       v2.d[1] , v3.d[0]
     uaddl     v16.8h, v31.8b, v25.8b    //p2+p3 H
-    mla       v12.8h, v8.8h , v1.4h[0]  //(p0+q0+p1)+3*p2+2*p3 L
-    mla       v4.8h, v16.8h , v1.4h[0]  //(p0+q0+p1)+3*p2+2*p3 H
+    mla       v12.8h, v8.8h , v1.h[0]   //(p0+q0+p1)+3*p2+2*p3 L
+    mla       v4.8h, v16.8h , v1.h[0]   //(p0+q0+p1)+3*p2+2*p3 H
     bic       v16.16b, v20.16b , v18.16b //((ABS(p0 - q0) >= Alpha  || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta))
     mov       v17.d[0] , v16.d[1]       //&& (Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
     bit       v2.16b, v28.16b , v20.16b //choosing between po' and p0"
@@ -443,7 +443,7 @@ ih264_deblk_luma_vert_bslt4_av8:
     ld1       {v4.8b}, [x0], x1         //row3
     rev       w12, w12                  //reversing ui_bs
     ld1       {v6.8b}, [x0], x1         //row4
-    mov       v18.2s[0], w12            //d12[0] = ui_Bs
+    mov       v18.s[0], w12             //d12[0] = ui_Bs
     ld1       {v16.s}[0], [x14]         //D16[0] contains cliptab
     ld1       {v8.8b}, [x0], x1         //row5
     uxtl      v18.8h, v18.8b            //q6 = uc_Bs in each 16 bt scalar
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
index 202c516..d2897b6 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
@@ -146,7 +146,7 @@ loop_16:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     ext       v24.16b, v20.16b , v22.16b , #4
     ext       v26.16b, v20.16b , v22.16b , #6
@@ -174,7 +174,7 @@ loop_16:
 
     uqxtn     v19.8b, v19.8h
     uqxtn     v25.8b, v25.8h
-    mov       v19.2s[1], v25.2s[0]
+    mov       v19.s[1], v25.s[0]
 
     uaddl     v22.8h, v4.8b, v10.8b
     ld1       {v0.2s, v1.2s}, [x0], #16 // Vector load from src[6_0]
@@ -228,7 +228,7 @@ loop_16:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     ext       v24.16b, v20.16b , v22.16b , #4
     ext       v26.16b, v20.16b , v22.16b , #6
@@ -253,7 +253,7 @@ loop_16:
 
     uqxtn     v19.8b, v19.8h
     uqxtn     v25.8b, v25.8h
-    mov       v19.2s[1], v25.2s[0]
+    mov       v19.s[1], v25.s[0]
 
 
     uaddl     v22.8h, v6.8b, v0.8b
@@ -306,7 +306,7 @@ loop_16:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
 
     ext       v24.16b, v20.16b , v22.16b , #4
@@ -334,7 +334,7 @@ loop_16:
 
     uqxtn     v19.8b, v19.8h
     uqxtn     v25.8b, v25.8h
-    mov       v19.2s[1], v25.2s[0]
+    mov       v19.s[1], v25.s[0]
 
 
 
@@ -387,7 +387,7 @@ loop_16:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
 
     ext       v24.16b, v20.16b , v22.16b , #4
@@ -427,7 +427,7 @@ loop_16:
 
     uqxtn     v19.8b, v19.8h
     uqxtn     v25.8b, v25.8h
-    mov       v19.2s[1], v25.2s[0]
+    mov       v19.s[1], v25.s[0]
 
 
 
@@ -501,7 +501,7 @@ loop_8:
     ld1       {v2.2s, v3.2s}, [x0], x2  // Vector load from src[7_0]
     uqxtn     v25.8b, v12.8h
     uqxtn     v13.8b, v13.8h
-    mov       v25.2s[1], v13.2s[0]
+    mov       v25.s[1], v13.s[0]
     uaddl     v16.8h, v8.8b, v10.8b
 
 
@@ -535,7 +535,7 @@ loop_8:
     uaddl     v28.8h, v9.8b, v11.8b
     uqxtn     v13.8b, v16.8h
     uqxtn     v17.8b, v17.8h
-    mov       v13.2s[1], v17.2s[0]
+    mov       v13.s[1], v17.s[0]
 
 
     uaddl     v14.8h, v5.8b, v3.8b
@@ -576,7 +576,7 @@ loop_8:
     mls       v16.8h, v30.8h , v24.8h
     uqxtn     v27.8b, v12.8h
     uqxtn     v13.8b, v13.8h
-    mov       v27.2s[1], v13.2s[0]
+    mov       v27.s[1], v13.s[0]
 
 
     ext       v22.16b, v28.16b , v16.16b , #10
@@ -616,7 +616,7 @@ loop_8:
     subs      x4, x4, #4
     uqxtn     v13.8b, v16.8h
     uqxtn     v17.8b, v17.8h
-    mov       v13.2s[1], v17.2s[0]
+    mov       v13.s[1], v17.s[0]
 
 
     mov       v0.16b, v8.16b
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
index 38f971b..546c807 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
@@ -275,7 +275,7 @@ loop_16_lowhalf:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     ld1       {v0.2s, v1.2s}, [x0], x2  // row 5 load for horizontal filter
 
@@ -313,7 +313,7 @@ loop_16_lowhalf:
     uaddl     v2.8h, v1.8b, v4.8b
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
     add       v30.8h, v14.8h , v16.8h
     mls       v8.8h, v2.8h , v24.8h
     ld1       {v0.2s, v1.2s}, [x0], x2  // row 6 load for horizontal filter
@@ -355,7 +355,7 @@ loop_16_lowhalf:
     mls       v28.8h, v2.8h , v24.8h
     uqxtn     v27.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v27.2s[1], v19.2s[0]
+    mov       v27.s[1], v19.s[0]
     saddl     v18.4s, v12.4h, v28.4h
     saddl2    v6.4s, v12.8h, v28.8h
 
@@ -384,7 +384,7 @@ loop_16_lowhalf:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     mov       v12.16b, v8.16b
     mov       v13.16b, v9.16b
@@ -523,7 +523,7 @@ loop_16_highhalf:
     mls       v20.8h, v2.8h , v24.8h
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
     ld1       {v0.2s, v1.2s}, [x8], x2
 
     urhadd    v26.8b, v18.8b , v26.8b
@@ -558,7 +558,7 @@ loop_16_highhalf:
     uaddl     v2.8h, v1.8b, v4.8b
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
     add       v30.8h, v14.8h , v16.8h
     mls       v8.8h, v2.8h , v24.8h
     ld1       {v0.2s, v1.2s}, [x8], x2
@@ -598,7 +598,7 @@ loop_16_highhalf:
     mls       v28.8h, v2.8h , v24.8h
     uqxtn     v27.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v27.2s[1], v19.2s[0]
+    mov       v27.s[1], v19.s[0]
 
 
     saddl     v18.4s, v12.4h, v28.4h
@@ -627,7 +627,7 @@ loop_16_highhalf:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     mov       v12.16b, v8.16b
     mov       v13.16b, v9.16b
@@ -768,7 +768,7 @@ loop_8:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     ld1       {v0.2s, v1.2s}, [x0], x2  // row 5 load for horizontal filter
 
@@ -812,7 +812,7 @@ loop_8:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     add       v30.8h, v14.8h , v16.8h
     mls       v8.8h, v2.8h , v24.8h
@@ -855,7 +855,7 @@ loop_8:
     uqxtn     v27.8b, v18.8h
     uqxtn     v19.8b, v19.8h
 
-    mov       v27.2s[1], v19.2s[0]
+    mov       v27.s[1], v19.s[0]
 
     saddl     v18.4s, v12.4h, v28.4h
     saddl2    v6.4s, v12.8h, v28.8h
@@ -885,7 +885,7 @@ loop_8:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
 
     mov       v12.16b, v8.16b
@@ -1024,7 +1024,7 @@ loop_4:
 
     sqrshrun  v9.8b, v6.8h, #5
     sqrshrun  v7.8b, v7.8h, #5
-    mov       v9.2s[1], v7.2s[0]
+    mov       v9.s[1], v7.s[0]
 
     ext       v20.8b, v18.8b , v19.8b , #2
 
@@ -1089,7 +1089,7 @@ loop_4:
 
     sqrshrun  v10.8b, v8.8h, #5
     sqrshrun  v9.8b, v9.8h, #5
-    mov       v10.2s[1], v9.2s[0]
+    mov       v10.s[1], v9.s[0]
 
     mov       v12.8b, v28.8b
 
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
index b1e4866..3f3e297 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
@@ -209,7 +209,7 @@ loop_16:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     ext       v24.16b, v20.16b , v22.16b , #4
     ext       v26.16b, v20.16b , v22.16b , #6
@@ -238,7 +238,7 @@ loop_16:
 
     uqxtn     v19.8b, v19.8h
     uqxtn     v18.8b, v18.8h
-    mov       v19.2s[1], v18.2s[0]
+    mov       v19.s[1], v18.s[0]
 
     ld1       {v18.2s}, [x1]
     sqrshrun  v20.8b, v20.8h, #5
@@ -297,7 +297,7 @@ loop_16:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
     ext       v24.16b, v20.16b , v22.16b , #4
     ext       v26.16b, v20.16b , v22.16b , #6
@@ -323,7 +323,7 @@ loop_16:
     ld1       {v22.4s}, [x6], x7
     uqxtn     v19.8b, v19.8h
     uqxtn     v18.8b, v18.8h
-    mov       v19.2s[1], v18.2s[0]
+    mov       v19.s[1], v18.s[0]
     ld1       {v18.4s}, [x1]
     sqrshrun  v20.8b, v20.8h, #5
     sqrshrun  v21.8b, v22.8h, #5
@@ -380,7 +380,7 @@ loop_16:
 
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
 
     ext       v24.16b, v20.16b , v22.16b , #4
@@ -409,7 +409,7 @@ loop_16:
 
     uqxtn     v19.8b, v19.8h
     uqxtn     v18.8b, v18.8h
-    mov       v19.2s[1], v18.2s[0]
+    mov       v19.s[1], v18.s[0]
 
     ld1       {v18.2s}, [x1]
     sqrshrun  v20.8b, v20.8h, #5
@@ -466,7 +466,7 @@ loop_16:
     ld1       {v22.4s}, [x9], #16
     uqxtn     v18.8b, v18.8h
     uqxtn     v19.8b, v19.8h
-    mov       v18.2s[1], v19.2s[0]
+    mov       v18.s[1], v19.s[0]
 
 
     ext       v24.16b, v20.16b , v22.16b , #4
@@ -506,7 +506,7 @@ loop_16:
 
     uqxtn     v19.8b, v19.8h
     uqxtn     v18.8b, v18.8h
-    mov       v19.2s[1], v18.2s[0]
+    mov       v19.s[1], v18.s[0]
 
     ld1       {v20.4s}, [x6], #16
     ld1       {v22.4s}, [x6], x7
@@ -586,7 +586,7 @@ loop_8:
     ld1       {v2.2s, v3.2s}, [x0], x2  // Vector load from src[7_0]
     uqxtn     v25.8b, v12.8h
     uqxtn     v13.8b, v13.8h
-    mov       v25.2s[1], v13.2s[0]
+    mov       v25.s[1], v13.s[0]
     uaddl     v16.8h, v8.8b, v10.8b
 
 
@@ -620,7 +620,7 @@ loop_8:
     uaddl     v28.8h, v9.8b, v11.8b
     uqxtn     v13.8b, v16.8h
     uqxtn     v17.8b, v17.8h
-    mov       v13.2s[1], v17.2s[0]
+    mov       v13.s[1], v17.s[0]
 
     urhadd    v12.16b, v12.16b , v14.16b
     urhadd    v13.16b, v13.16b , v15.16b
@@ -662,7 +662,7 @@ loop_8:
     mls       v16.8h, v30.8h , v24.8h
     uqxtn     v27.8b, v12.8h
     uqxtn     v13.8b, v13.8h
-    mov       v27.2s[1], v13.2s[0]
+    mov       v27.s[1], v13.s[0]
 
     sqrshrun  v14.8b, v14.8h, #5
     ext       v22.16b, v28.16b , v16.16b , #10
@@ -702,7 +702,7 @@ loop_8:
     subs      x4, x4, #4
     uqxtn     v13.8b, v16.8h
     uqxtn     v17.8b, v17.8h
-    mov       v13.2s[1], v17.2s[0]
+    mov       v13.s[1], v17.s[0]
     urhadd    v12.16b, v12.16b , v14.16b
     urhadd    v13.16b, v13.16b , v15.16b
 
diff --git a/common/armv8/ih264_intra_pred_chroma_av8.s b/common/armv8/ih264_intra_pred_chroma_av8.s
index 2c5efb3..8f0f282 100644
--- a/common/armv8/ih264_intra_pred_chroma_av8.s
+++ b/common/armv8/ih264_intra_pred_chroma_av8.s
@@ -501,7 +501,7 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
     add       v16.8h, v0.8h , v16.8h
     dup       v20.8h, v22.h[0]
     mul       v4.8h, v6.8h , v20.8h
-    dup       v30.8h, v22.4h[1]
+    dup       v30.8h, v22.h[1]
     mul       v18.8h, v6.8h , v20.8h
     mul       v14.8h, v6.8h , v30.8h
     mul       v8.8h, v6.8h , v30.8h
@@ -511,7 +511,7 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
     sqrshrun  v28.8b, v24.8h, #5
     add       v26.8h, v16.8h , v8.8h
     sqrshrun  v29.8b, v0.8h, #5
-    dup       v20.8h, v22.4h[2]
+    dup       v20.8h, v22.h[2]
     st1       {v28.8b, v29.8b}, [x1], x3
     sqrshrun  v28.8b, v2.8h, #5
     sqrshrun  v29.8b, v26.8h, #5
@@ -520,7 +520,7 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
     st1       {v28.8b, v29.8b}, [x1], x3
     add       v24.8h, v12.8h , v4.8h
     add       v0.8h, v16.8h , v18.8h
-    dup       v30.8h, v22.4h[3]
+    dup       v30.8h, v22.h[3]
     sqrshrun  v28.8b, v24.8h, #5
     sqrshrun  v29.8b, v0.8h, #5
     mul       v14.8h, v6.8h , v30.8h
diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
index a9eb165..c1847b5 100644
--- a/common/armv8/ih264_intra_pred_luma_16x16_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
@@ -467,7 +467,7 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
     ldrb      w5, [x7], #-1
     sxtw      x5, w5
     add       x8, x8, x8, lsl #1
-    dup       v4.8h, v0.4h[0]
+    dup       v4.8h, v0.h[0]
     add       x12, x12, x8
     ldrb      w9, [x0], #1
     sxtw      x9, w9
diff --git a/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
index 2b972ca..bf9a4c1 100644
--- a/common/armv8/ih264_intra_pred_luma_8x8_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
@@ -337,7 +337,7 @@ ih264_intra_pred_luma_8x8_mode_dc_av8:
     uaddlp    v3.2s, v1.4h
     uaddlp    v2.1d, v3.2s
     dup       v10.8h, w5
-    dup       v8.8h, v2.4h[0]
+    dup       v8.8h, v2.h[0]
     add       v12.8h, v8.8h , v10.8h
     sqrshrun  v31.8b, v12.8h, #4
     st1       {v31.8b}, [x1], x3
@@ -360,7 +360,7 @@ top_available: // ONLT TOP AVAILABLE
     uaddlp    v13.2s, v14.4h
     uaddlp    v12.1d, v13.2s
     rshrn     v4.8b, v12.8h, #3
-    dup       v31.8b, v4.8b[0]
+    dup       v31.8b, v4.b[0]
     st1       {v31.8b}, [x1], x3
     st1       {v31.8b}, [x1], x3
     st1       {v31.8b}, [x1], x3
@@ -1059,7 +1059,7 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
     mov       v30.16b, v4.16b
     mov       v31.16b, v6.16b
     tbl       v12.8b, {v30.16b, v31.16b}, v10.8b
-    dup       v14.16b, v5.8b[7]         //
+    dup       v14.16b, v5.b[7]          //
     tbl       v13.8b, {v30.16b, v31.16b}, v11.8b
     mov       v12.d[1], v13.d[0]
     ext       v16.16b, v12.16b , v14.16b , #2
diff --git a/common/armv8/ih264_resi_trans_quant_av8.s b/common/armv8/ih264_resi_trans_quant_av8.s
index dc1c680..316c220 100644
--- a/common/armv8/ih264_resi_trans_quant_av8.s
+++ b/common/armv8/ih264_resi_trans_quant_av8.s
@@ -665,7 +665,7 @@ ih264_hadamard_quant_2x2_uv_av8:
     ld2       {v0.4h-v1.4h}, [x0]       //load src
 
     ld1       {v30.h}[0], [x2]          //load pu2_scale_matrix[0]
-    dup       v30.4h, v30.4h[0]         //pu2_scale_matrix
+    dup       v30.4h, v30.h[0]          //pu2_scale_matrix
     uxtl      v30.4s, v30.4h            //pu2_scale_matrix
 
     neg       w4, w4
diff --git a/common/armv8/ih264_weighted_bi_pred_av8.s b/common/armv8/ih264_weighted_bi_pred_av8.s
index 96ef50a..b039fba 100644
--- a/common/armv8/ih264_weighted_bi_pred_av8.s
+++ b/common/armv8/ih264_weighted_bi_pred_av8.s
@@ -173,10 +173,10 @@ loop_4:                                 //each iteration processes four rows
     ld1       {v10.s}[1], [x1], x4      //load row 4 in source 2
     uxtl      v8.8h, v8.8b              //converting rows 3,4 in source 1 to 16-bit
     uxtl      v10.8h, v10.8b            //converting rows 3,4 in source 2 to 16-bit
-    mul       v4.8h, v4.8h , v2.4h[0]   //weight 1 mult. for rows 1,2
-    mla       v4.8h, v6.8h , v2.4h[2]   //weight 2 mult. for rows 1,2
-    mul       v8.8h, v8.8h , v2.4h[0]   //weight 1 mult. for rows 3,4
-    mla       v8.8h, v10.8h , v2.4h[2]  //weight 2 mult. for rows 3,4
+    mul       v4.8h, v4.8h , v2.h[0]    //weight 1 mult. for rows 1,2
+    mla       v4.8h, v6.8h , v2.h[2]    //weight 2 mult. for rows 1,2
+    mul       v8.8h, v8.8h , v2.h[0]    //weight 1 mult. for rows 3,4
+    mla       v8.8h, v10.8h , v2.h[2]   //weight 2 mult. for rows 3,4
     subs      w11, w11, #4              //decrement ht by 4
     srshl     v4.8h, v4.8h , v0.8h      //rounds off the weighted samples from rows 1,2
     srshl     v8.8h, v8.8h , v0.8h      //rounds off the weighted samples from rows 3,4
@@ -205,18 +205,18 @@ loop_8:                                 //each iteration processes four rows
     ld1       {v18.8b}, [x1], x4        //load row 4 in source 2
     uxtl      v8.8h, v8.8b              //converting row 2 in source 1 to 16-bit
     uxtl      v10.8h, v10.8b            //converting row 2 in source 2 to 16-bit
-    mul       v4.8h, v4.8h , v2.4h[0]   //weight 1 mult. for row 1
-    mla       v4.8h, v6.8h , v2.4h[2]   //weight 2 mult. for row 1
+    mul       v4.8h, v4.8h , v2.h[0]    //weight 1 mult. for row 1
+    mla       v4.8h, v6.8h , v2.h[2]    //weight 2 mult. for row 1
     uxtl      v12.8h, v12.8b            //converting row 3 in source 1 to 16-bit
     uxtl      v14.8h, v14.8b            //converting row 3 in source 2 to 16-bit
-    mul       v8.8h, v8.8h , v2.4h[0]   //weight 1 mult. for row 2
-    mla       v8.8h, v10.8h , v2.4h[2]  //weight 2 mult. for row 2
+    mul       v8.8h, v8.8h , v2.h[0]    //weight 1 mult. for row 2
+    mla       v8.8h, v10.8h , v2.h[2]   //weight 2 mult. for row 2
     uxtl      v16.8h, v16.8b            //converting row 4 in source 1 to 16-bit
     uxtl      v18.8h, v18.8b            //converting row 4 in source 2 to 16-bit
-    mul       v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3
-    mla       v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3
-    mul       v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4
-    mla       v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4
+    mul       v12.8h, v12.8h , v2.h[0]  //weight 1 mult. for row 3
+    mla       v12.8h, v14.8h , v2.h[2]  //weight 2 mult. for row 3
+    mul       v16.8h, v16.8h , v2.h[0]  //weight 1 mult. for row 4
+    mla       v16.8h, v18.8h , v2.h[2]  //weight 2 mult. for row 4
     srshl     v4.8h, v4.8h , v0.8h      //rounds off the weighted samples from row 1
     srshl     v8.8h, v8.8h , v0.8h      //rounds off the weighted samples from row 2
     srshl     v12.8h, v12.8h , v0.8h    //rounds off the weighted samples from row 3
@@ -251,35 +251,35 @@ loop_16:                                //each iteration processes two rows
     ld1       {v18.8b, v19.8b}, [x1], x4 //load row 4 in source 2
     uxtl      v4.8h, v5.8b              //converting row 1H in source 1 to 16-bit
     uxtl      v6.8h, v7.8b              //converting row 1H in source 2 to 16-bit
-    mul       v20.8h, v20.8h , v2.4h[0] //weight 1 mult. for row 1L
-    mla       v20.8h, v22.8h , v2.4h[2] //weight 2 mult. for row 1L
+    mul       v20.8h, v20.8h , v2.h[0]  //weight 1 mult. for row 1L
+    mla       v20.8h, v22.8h , v2.h[2]  //weight 2 mult. for row 1L
     uxtl      v24.8h, v8.8b             //converting row 2L in source 1 to 16-bit
     uxtl      v26.8h, v10.8b            //converting row 2L in source 2 to 16-bit
-    mul       v4.8h, v4.8h , v2.4h[0]   //weight 1 mult. for row 1H
-    mla       v4.8h, v6.8h , v2.4h[2]   //weight 2 mult. for row 1H
+    mul       v4.8h, v4.8h , v2.h[0]    //weight 1 mult. for row 1H
+    mla       v4.8h, v6.8h , v2.h[2]    //weight 2 mult. for row 1H
     uxtl      v8.8h, v9.8b              //converting row 2H in source 1 to 16-bit
     uxtl      v10.8h, v11.8b            //converting row 2H in source 2 to 16-bit
-    mul       v24.8h, v24.8h , v2.4h[0] //weight 1 mult. for row 2L
-    mla       v24.8h, v26.8h , v2.4h[2] //weight 2 mult. for row 2L
+    mul       v24.8h, v24.8h , v2.h[0]  //weight 1 mult. for row 2L
+    mla       v24.8h, v26.8h , v2.h[2]  //weight 2 mult. for row 2L
     uxtl      v28.8h, v12.8b            //converting row 3L in source 1 to 16-bit
     uxtl      v30.8h, v14.8b            //converting row 3L in source 2 to 16-bit
-    mul       v8.8h, v8.8h , v2.4h[0]   //weight 1 mult. for row 2H
-    mla       v8.8h, v10.8h , v2.4h[2]  //weight 2 mult. for row 2H
+    mul       v8.8h, v8.8h , v2.h[0]    //weight 1 mult. for row 2H
+    mla       v8.8h, v10.8h , v2.h[2]   //weight 2 mult. for row 2H
     uxtl      v12.8h, v13.8b            //converting row 3H in source 1 to 16-bit
     uxtl      v14.8h, v15.8b            //converting row 3H in source 2 to 16-bit
-    mul       v28.8h, v28.8h , v2.4h[0] //weight 1 mult. for row 3L
-    mla       v28.8h, v30.8h , v2.4h[2] //weight 2 mult. for row 3L
+    mul       v28.8h, v28.8h , v2.h[0]  //weight 1 mult. for row 3L
+    mla       v28.8h, v30.8h , v2.h[2]  //weight 2 mult. for row 3L
     uxtl      v22.8h, v16.8b            //converting row 4L in source 1 to 16-bit
     uxtl      v6.8h, v18.8b             //converting row 4L in source 2 to 16-bit
-    mul       v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3H
-    mla       v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3H
+    mul       v12.8h, v12.8h , v2.h[0]  //weight 1 mult. for row 3H
+    mla       v12.8h, v14.8h , v2.h[2]  //weight 2 mult. for row 3H
     uxtl      v16.8h, v17.8b            //converting row 4H in source 1 to 16-bit
     uxtl      v18.8h, v19.8b            //converting row 4H in source 2 to 16-bit
-    mul       v22.8h, v22.8h , v2.4h[0] //weight 1 mult. for row 4L
-    mla       v22.8h, v6.8h , v2.4h[2]  //weight 2 mult. for row 4L
+    mul       v22.8h, v22.8h , v2.h[0]  //weight 1 mult. for row 4L
+    mla       v22.8h, v6.8h , v2.h[2]   //weight 2 mult. for row 4L
     srshl     v20.8h, v20.8h , v0.8h    //rounds off the weighted samples from row 1L
-    mul       v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4H
-    mla       v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4H
+    mul       v16.8h, v16.8h , v2.h[0]  //weight 1 mult. for row 4H
+    mla       v16.8h, v18.8h , v2.h[2]  //weight 2 mult. for row 4H
     srshl     v4.8h, v4.8h , v0.8h      //rounds off the weighted samples from row 1H
     srshl     v24.8h, v24.8h , v0.8h    //rounds off the weighted samples from row 2L
     saddw     v20.8h, v20.8h , v3.8b    //adding offset for row 1L
diff --git a/common/armv8/ih264_weighted_pred_av8.s b/common/armv8/ih264_weighted_pred_av8.s
index ec5bb7a..69ed3b0 100644
--- a/common/armv8/ih264_weighted_pred_av8.s
+++ b/common/armv8/ih264_weighted_pred_av8.s
@@ -143,8 +143,8 @@ loop_4:                                 //each iteration processes four rows
     uxtl      v4.8h, v4.8b              //converting rows 1,2 to 16-bit
     uxtl      v6.8h, v6.8b              //converting rows 3,4 to 16-bit
 
-    mul       v4.8h, v4.8h , v2.4h[0]   //weight mult. for rows 1,2
-    mul       v6.8h, v6.8h , v2.4h[0]   //weight mult. for rows 3,4
+    mul       v4.8h, v4.8h , v2.h[0]    //weight mult. for rows 1,2
+    mul       v6.8h, v6.8h , v2.h[0]    //weight mult. for rows 3,4
 
     subs      w7, w7, #4                //decrement ht by 4
     srshl     v4.8h, v4.8h , v0.8h      //rounds off the weighted samples from rows 1,2
@@ -175,11 +175,11 @@ loop_8:                                 //each iteration processes four rows
     uxtl      v6.8h, v6.8b              //converting row 2 to 16-bit
 
     uxtl      v8.8h, v8.8b              //converting row 3 to 16-bit
-    mul       v4.8h, v4.8h , v2.4h[0]   //weight mult. for row 1
+    mul       v4.8h, v4.8h , v2.h[0]    //weight mult. for row 1
     uxtl      v10.8h, v10.8b            //converting row 4 to 16-bit
-    mul       v6.8h, v6.8h , v2.4h[0]   //weight mult. for row 2
-    mul       v8.8h, v8.8h , v2.4h[0]   //weight mult. for row 3
-    mul       v10.8h, v10.8h , v2.4h[0] //weight mult. for row 4
+    mul       v6.8h, v6.8h , v2.h[0]    //weight mult. for row 2
+    mul       v8.8h, v8.8h , v2.h[0]    //weight mult. for row 3
+    mul       v10.8h, v10.8h , v2.h[0]  //weight mult. for row 4
 
     srshl     v4.8h, v4.8h , v0.8h      //rounds off the weighted samples from row 1
     srshl     v6.8h, v6.8h , v0.8h      //rounds off the weighted samples from row 2
@@ -214,20 +214,20 @@ loop_16:                                //each iteration processes two rows
     uxtl      v14.8h, v5.8b             //converting row 1H to 16-bit
     ld1       {v10.8b, v11.8b}, [x0], x2 //load row 4 in source
     uxtl      v16.8h, v6.8b             //converting row 2L to 16-bit
-    mul       v12.8h, v12.8h , v2.4h[0] //weight mult. for row 1L
+    mul       v12.8h, v12.8h , v2.h[0]  //weight mult. for row 1L
     uxtl      v18.8h, v7.8b             //converting row 2H to 16-bit
-    mul       v14.8h, v14.8h , v2.4h[0] //weight mult. for row 1H
+    mul       v14.8h, v14.8h , v2.h[0]  //weight mult. for row 1H
     uxtl      v20.8h, v8.8b             //converting row 3L to 16-bit
-    mul       v16.8h, v16.8h , v2.4h[0] //weight mult. for row 2L
+    mul       v16.8h, v16.8h , v2.h[0]  //weight mult. for row 2L
     uxtl      v22.8h, v9.8b             //converting row 3H to 16-bit
-    mul       v18.8h, v18.8h , v2.4h[0] //weight mult. for row 2H
+    mul       v18.8h, v18.8h , v2.h[0]  //weight mult. for row 2H
     uxtl      v24.8h, v10.8b            //converting row 4L to 16-bit
-    mul       v20.8h, v20.8h , v2.4h[0] //weight mult. for row 3L
+    mul       v20.8h, v20.8h , v2.h[0]  //weight mult. for row 3L
     uxtl      v26.8h, v11.8b            //converting row 4H to 16-bit
-    mul       v22.8h, v22.8h , v2.4h[0] //weight mult. for row 3H
-    mul       v24.8h, v24.8h , v2.4h[0] //weight mult. for row 4L
+    mul       v22.8h, v22.8h , v2.h[0]  //weight mult. for row 3H
+    mul       v24.8h, v24.8h , v2.h[0]  //weight mult. for row 4L
     srshl     v12.8h, v12.8h , v0.8h    //rounds off the weighted samples from row 1L
-    mul       v26.8h, v26.8h , v2.4h[0] //weight mult. for row 4H
+    mul       v26.8h, v26.8h , v2.h[0]  //weight mult. for row 4H
     srshl     v14.8h, v14.8h , v0.8h    //rounds off the weighted samples from row 1H
     srshl     v16.8h, v16.8h , v0.8h    //rounds off the weighted samples from row 2L
     saddw     v12.8h, v12.8h , v3.8b    //adding offset for row 1L
author	Martin Storsjo <martin@martin.st>	2015-06-10 12:05:14 +0300
committer	Marco Nelissen <marcone@google.com>	2015-06-25 08:25:46 -0700
commit	9f81a0a2024d1aa640e15085717a8164f770eba4 (patch)
tree	4095089abfaace92958b0013b636f09cfbae3374 /common
parent	436fccb1641f9f25afff6cf20f9d4957c08f43cd (diff)
download	android_external_libavc-9f81a0a2024d1aa640e15085717a8164f770eba4.tar.gz android_external_libavc-9f81a0a2024d1aa640e15085717a8164f770eba4.tar.bz2 android_external_libavc-9f81a0a2024d1aa640e15085717a8164f770eba4.zip