summaryrefslogtreecommitdiffstats
path: root/common/arm
diff options
context:
space:
mode:
Diffstat (limited to 'common/arm')
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_arm_memory_barrier.s3
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_deblk_chroma_a9.s48
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_deblk_luma_a9.s24
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_default_weighted_pred_a9q.s9
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_ihadamard_scaling_a9.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_chroma_a9q.s18
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_filters_luma_horz_a9q.s21
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_filters_luma_vert_a9q.s18
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_bilinear_a9q.s22
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_copy_a9q.s10
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s16
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s20
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s11
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s16
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_chroma_a9q.s32
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_luma_16x16_a9q.s42
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_luma_4x4_a9q.s55
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_luma_8x8_a9q.s59
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_iquant_itrans_recon_a9.s16
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_iquant_itrans_recon_dc_a9.s17
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_itrans_recon_a9.s8
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_mem_fns_neon.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_padding_neon.s25
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_platform_macros.h0
-rwxr-xr-xcommon/arm/ih264_resi_trans_a9.s604
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_resi_trans_quant_a9.s2
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_weighted_bi_pred_a9q.s8
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_weighted_pred_a9q.s8
30 files changed, 273 insertions, 895 deletions
diff --git a/common/arm/ih264_arm_memory_barrier.s b/common/arm/ih264_arm_memory_barrier.s
index 523218f..3816409 100755..100644
--- a/common/arm/ih264_arm_memory_barrier.s
+++ b/common/arm/ih264_arm_memory_barrier.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@*******************************************************************************
@* @file
@* ih264_arm_memory_barrier.s
@@ -39,7 +39,6 @@
.text
.p2align 2
-
@*****************************************************************************
@*
@* Function Name : ih264_arm_dsb
diff --git a/common/arm/ih264_deblk_chroma_a9.s b/common/arm/ih264_deblk_chroma_a9.s
index 66102a7..8c9960a 100755..100644
--- a/common/arm/ih264_deblk_chroma_a9.s
+++ b/common/arm/ih264_deblk_chroma_a9.s
@@ -54,7 +54,7 @@
.text
.p2align 2
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -84,7 +84,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bs4_bp_a9
@@ -130,7 +130,7 @@ ih264_deblk_chroma_horz_bs4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -160,7 +160,7 @@ ih264_deblk_chroma_horz_bs4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_bp_a9
@@ -224,7 +224,7 @@ ih264_deblk_chroma_vert_bs4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -260,7 +260,7 @@ ih264_deblk_chroma_vert_bs4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bslt4_bp_a9
@@ -326,7 +326,7 @@ ih264_deblk_chroma_horz_bslt4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -362,7 +362,7 @@ ih264_deblk_chroma_horz_bslt4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_bp_a9
@@ -465,7 +465,7 @@ ih264_deblk_chroma_vert_bslt4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -495,7 +495,7 @@ ih264_deblk_chroma_vert_bslt4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_mbaff_bp_a9
@@ -543,7 +543,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -579,7 +579,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9
@@ -656,7 +656,7 @@ ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -692,7 +692,7 @@ ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bs4_a9
@@ -743,7 +743,7 @@ ih264_deblk_chroma_horz_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -779,7 +779,7 @@ ih264_deblk_chroma_horz_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_a9
@@ -848,7 +848,7 @@ ih264_deblk_chroma_vert_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -893,7 +893,7 @@ ih264_deblk_chroma_vert_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bslt4_a9
@@ -968,7 +968,7 @@ ih264_deblk_chroma_horz_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -1013,7 +1013,7 @@ ih264_deblk_chroma_horz_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_a9
@@ -1119,7 +1119,7 @@ ih264_deblk_chroma_vert_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -1155,7 +1155,7 @@ ih264_deblk_chroma_vert_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_mbaff_a9
@@ -1206,7 +1206,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -1251,7 +1251,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_mbaff_a9
diff --git a/common/arm/ih264_deblk_luma_a9.s b/common/arm/ih264_deblk_luma_a9.s
index 3e6a4d9..9217ed2 100755..100644
--- a/common/arm/ih264_deblk_luma_a9.s
+++ b/common/arm/ih264_deblk_luma_a9.s
@@ -47,7 +47,7 @@
.text
.p2align 2
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -83,7 +83,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_horz_bslt4_a9
@@ -187,7 +187,7 @@ ih264_deblk_luma_horz_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -217,7 +217,7 @@ ih264_deblk_luma_horz_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_horz_bs4_a9
@@ -353,7 +353,7 @@ ih264_deblk_luma_horz_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -389,7 +389,7 @@ ih264_deblk_luma_horz_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bslt4_a9
@@ -574,7 +574,7 @@ ih264_deblk_luma_vert_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -604,7 +604,7 @@ ih264_deblk_luma_vert_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bs4_a9
@@ -800,7 +800,7 @@ ih264_deblk_luma_vert_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -830,7 +830,7 @@ ih264_deblk_luma_vert_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bs4_mbaff_a9
@@ -942,7 +942,7 @@ ih264_deblk_luma_vert_bs4_mbaff_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -978,7 +978,7 @@ ih264_deblk_luma_vert_bs4_mbaff_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bslt4_mbaff_a9
diff --git a/common/arm/ih264_default_weighted_pred_a9q.s b/common/arm/ih264_default_weighted_pred_a9q.s
index 94cda46..a4688f2 100755..100644
--- a/common/arm/ih264_default_weighted_pred_a9q.s
+++ b/common/arm/ih264_default_weighted_pred_a9q.s
@@ -17,14 +17,13 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_default_weighted_pred_a9q.s
@*
@* @brief
@* Contains function definitions for default weighted prediction.
-@* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
@*
@* @author
@* Kaushik Senthoor R
@@ -38,7 +37,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@*******************************************************************************
@* @function
@* ih264_default_weighted_pred_luma_a9q()
@@ -82,7 +81,7 @@
@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_default_weighted_pred_luma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
@@ -256,7 +255,7 @@ end_loops:
@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_default_weighted_pred_chroma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_ihadamard_scaling_a9.s b/common/arm/ih264_ihadamard_scaling_a9.s
index 687099a..c7feddd 100755..100644
--- a/common/arm/ih264_ihadamard_scaling_a9.s
+++ b/common/arm/ih264_ihadamard_scaling_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_ihadamard_scaling_a9.s
@@ -37,7 +37,7 @@
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
@ * of a 16x16 intra prediction macroblock, and then performs scaling.
@ * prediction buffer
@@ -69,10 +69,10 @@
@ * @remarks none
@ *
@ *******************************************************************************
-@ */
+@ *
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_ihadamard_scaling_4x4(WORD16* pi2_src,
@ WORD16* pi2_out,
@ const UWORD16 *pu2_iscal_mat,
@@ -161,7 +161,7 @@ ih264_ihadamard_scaling_4x4_a9:
@ *******************************************************************************
-@ */
+@ *
@ * @brief This function performs a 2x2 inverse hadamard transform for chroma block
@ *
@ * @par Description:
@@ -189,10 +189,10 @@ ih264_ihadamard_scaling_4x4_a9:
@ * @remarks none
@ *
@ *******************************************************************************
-@ */
+@ *
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src,
@ WORD16* pi2_out,
@ const UWORD16 *pu2_iscal_mat,
diff --git a/common/arm/ih264_inter_pred_chroma_a9q.s b/common/arm/ih264_inter_pred_chroma_a9q.s
index afd2860..6681a7c 100755..100644
--- a/common/arm/ih264_inter_pred_chroma_a9q.s
+++ b/common/arm/ih264_inter_pred_chroma_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_chroma_a9q.s
@@ -36,16 +36,16 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -85,7 +85,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_chroma(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@@ -112,8 +112,6 @@
ih264_inter_pred_chroma_a9q:
-
-
stmfd sp!, {r4-r12, r14} @store register values to stack
vstmdb sp!, {d8-d15} @push neon registers to stack
ldr r4, [sp, #104]
diff --git a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
index ea6bba0..62b4b94 100755..100644
--- a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
+++ b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_a9q.s
@@ -36,13 +36,13 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -76,7 +76,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_horz (
@ UWORD8 *pu1_src,
@@ -102,6 +102,9 @@
ih264_inter_pred_luma_horz_a9q:
+
+
+
stmfd sp!, {r4-r12, r14} @store register values to stack
vstmdb sp!, {d8-d15} @push neon registers to stack
ldr r5, [sp, #104] @Loads ht
@@ -116,7 +119,7 @@ ih264_inter_pred_luma_horz_a9q:
beq loop_4
loop_16: @when wd=16
- @// Processing row0 and row1
+ @ Processing row0 and row1
vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0 ;for checking loop
vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
@@ -173,7 +176,7 @@ loop_16: @when wd=16
b loop_16 @ loop if height == 8 or 16
loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.8 {d5, d6}, [r0], r2 @// Load row1
vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
vld1.8 {d2, d3}, [r0], r2 @// Load row0
@@ -204,7 +207,7 @@ loop_8:
beq end_func @ Branch if height==4
- b loop_8 @looping if height =8 or 16
+ b loop_8 @looping if height =8 or 16
loop_4:
vld1.8 {d5, d6}, [r0], r2 @// Load row1
diff --git a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
index 5b29e02..65c40a6 100755..100644
--- a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
+++ b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_vert_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -195,10 +195,10 @@ loop_16: @when wd=16
subne r0, r0, r2
beq end_func @ Branch if height==4
- b loop_16 @ looping if height = 8 or 16
+ b loop_16 @ looping if height = 8 or 16
loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.u32 d0, [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1, [r0], r2 @ Vector load from src[1_0]
@@ -248,7 +248,7 @@ loop_8:
loop_4:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0]
diff --git a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
index 6a3c83d..8f049f8 100755..100644
--- a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_bilinear_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@ *******************************************************************************
@ * function:ih264_inter_pred_luma_bilinear
@ *
@@ -89,7 +89,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@@ -192,7 +192,7 @@ loop_16: @when wd=16
subs r12, r6, #8
vst1.8 {q15}, [r2], r5 @//Store dest row7
- beq end_func @ end function if ht=8
+ beq end_func @ end function if ht=8
vld1.8 {q0}, [r0], r3 @// Load row8 ;src1
vaddl.u8 q10, d0, d4
@@ -275,7 +275,7 @@ loop_8: @wd=8;
vqrshrun.s16 d31, q13, #1
subs r12, r6, #4
vst1.8 {d31}, [r2], r5 @//Store dest row3
- beq end_func @ end function if ht=4
+ beq end_func @ end function if ht=4
vld1.8 {d12}, [r1], r4 @// Load row4 ;src2
vld1.8 {d8}, [r0], r3 @// Load row4 ;src1
@@ -298,7 +298,7 @@ loop_8: @wd=8;
vqrshrun.s16 d31, q11, #1
subs r12, r6, #8
vst1.8 {d31}, [r2], r5 @//Store dest row7
- beq end_func @ end function if ht=8
+ beq end_func @ end function if ht=8
vld1.8 {d0}, [r0], r3 @// Load row8 ;src1
vld1.8 {d4}, [r1], r4 @// Load row8 ;src2
@@ -367,7 +367,7 @@ loop_4:
vqrshrun.s16 d31, q13, #1
subs r12, r6, #4
vst1.32 d31[0], [r2], r5 @//Store dest row3
- beq end_func @ end function if ht=4
+ beq end_func @ end function if ht=4
vld1.32 d12[0], [r1], r4 @// Load row4 ;src2
vld1.32 d8[0], [r0], r3 @// Load row4 ;src1
diff --git a/common/arm/ih264_inter_pred_luma_copy_a9q.s b/common/arm/ih264_inter_pred_luma_copy_a9q.s
index 8ba2fbf..c0b0568 100755..100644
--- a/common/arm/ih264_inter_pred_luma_copy_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_copy_a9q.s
@@ -17,8 +17,8 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
-@/**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -53,7 +53,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_copy (
@ UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@@ -182,7 +182,7 @@ end_inner_loop_wd_16:
ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
-@ /*
+@ *
@ ********************************************************************************
@ *
@ * @brief This function copies a 4x4 block to destination
@@ -208,7 +208,7 @@ end_inner_loop_wd_16:
@ * Currently wd and height is not used, ie a 4x4 block is always copied
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_interleave_copy(WORD16 *pi2_src,
@ UWORD8 *pu1_out,
@ WORD32 pred_strd,
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
index 43321a8..54183f0 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -88,7 +88,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
index 65a6de7..c8edf38 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -91,7 +91,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@@ -835,7 +835,7 @@ loop_8:
vmov q7, q14
vst1.32 d30, [r1], r3 @ store row 3
- bgt loop_8 @if height =8 or 16 loop
+ bgt loop_8 @if height =8 or 16 loop
b end_func
loop_4_start:
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
index c39ae01..ab1d1d1 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -30,19 +30,19 @@
@*
@* @par List of Functions:
@*
-@* - ih264_inter_pred_luma_horz_qpe_a9ql()
+@* - ih264_inter_pred_luma_horz_qpel_a9q()
@*
@* @remarks
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -79,7 +79,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_horz (
@ UWORD8 *pu1_src,
@@ -126,7 +126,7 @@ ih264_inter_pred_luma_horz_qpel_a9q:
beq loop_4
loop_16: @when wd=16
- @// Processing row0 and row1
+ @ Processing row0 and row1
vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0
vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
@@ -187,7 +187,7 @@ loop_16: @when wd=16
b loop_16
loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.8 {d5, d6}, [r0], r2 @// Load row1
vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
@@ -221,7 +221,7 @@ loop_8:
subs r5, r5, #2 @ 2 rows done, decrement by 2
beq end_func @ Branch if height==4
- b loop_8 @looping if height == 8 or 16
+ b loop_8 @looping if height == 8 or 16
loop_4:
vld1.8 {d5, d6}, [r0], r2 @// Load row1
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
index 565cc80..3c63ca3 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -91,7 +91,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
index 3c8b60a..cfe03a0 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
@@ -36,14 +36,11 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
@*******************************************************************************
@*
@* @brief
@@ -90,7 +87,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
index d45055e..e2c68ef 100755..100644
--- a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_vert_qpel_a9q.s
@@ -36,13 +36,11 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
@*******************************************************************************
@*
@* @brief
@@ -79,7 +77,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_vert (
@ UWORD8 *pu1_src,
@@ -211,12 +209,12 @@ loop_16: @when wd=16
subne r0, r0, r2
beq end_func @ Branch if height==4
- b loop_16 @ looping if height = 8 or 16
+ b loop_16 @ looping if height = 8 or 16
loop_8:
- @// Processing row0 and row1
+ @ Processing row0 and row1
vld1.u32 d0, [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1, [r0], r2 @ Vector load from src[1_0]
vld1.u32 d2, [r0], r2 @ Vector load from src[2_0]
@@ -270,7 +268,7 @@ loop_8:
b loop_8 @looping if height == 8 or 16
loop_4:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0]
diff --git a/common/arm/ih264_intra_pred_chroma_a9q.s b/common/arm/ih264_intra_pred_chroma_a9q.s
index d03fc55..ccd5c0d 100755..100644
--- a/common/arm/ih264_intra_pred_chroma_a9q.s
+++ b/common/arm/ih264_intra_pred_chroma_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_chroma_a9q.s
@@ -39,15 +39,11 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
+@* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
@
-@/**
-@/**
-@/**
-@
.text
.p2align 2
@@ -60,7 +56,7 @@ scratch_chroma_intrapred_addr1:
scratch_intrapred_chroma_plane_addr1:
.long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_dc
@@ -91,7 +87,7 @@ scratch_intrapred_chroma_plane_addr1:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -105,8 +101,6 @@ scratch_intrapred_chroma_plane_addr1:
@ r3 => dst_strd
@ r4 => ui_neighboravailability
-
-
.global ih264_intra_pred_chroma_8x8_mode_dc_a9q
ih264_intra_pred_chroma_8x8_mode_dc_a9q:
@@ -191,10 +185,10 @@ str_pred:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_horz
@@ -226,7 +220,7 @@ str_pred:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -270,7 +264,7 @@ loop_8x8_horz:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_vert
@@ -339,10 +333,10 @@ ih264_intra_pred_chroma_8x8_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_plane
@@ -373,7 +367,7 @@ ih264_intra_pred_chroma_8x8_mode_vert_a9q:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -393,7 +387,6 @@ ih264_intra_pred_chroma_8x8_mode_plane_a9q:
stmfd sp!, {r4-r10, r12, lr}
vpush {d8-d15}
-
vld1.32 d0, [r0]
add r10, r0, #10
vld1.32 d1, [r10]
@@ -542,7 +535,6 @@ scrlblc2:
end_func_plane:
-
vpop {d8-d15}
ldmfd sp!, {r4-r10, r12, pc}
diff --git a/common/arm/ih264_intra_pred_luma_16x16_a9q.s b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
index e38e203..0dd82f3 100755..100644
--- a/common/arm/ih264_intra_pred_luma_16x16_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_16x16_a9q.s
@@ -39,14 +39,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@* All the functions here are replicated from ih264_intra_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@
.text
@@ -57,10 +57,10 @@
.hidden ih264_gai1_intrapred_luma_plane_coeffs
scratch_intrapred_addr1:
.long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_vert_a9q
+@*ih264_intra_pred_luma_16x16_mode_vert
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:vertical
@@ -135,13 +135,13 @@ ih264_intra_pred_luma_16x16_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_horz_a9q
+@*ih264_intra_pred_luma_16x16_mode_horz
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:horizontal
@@ -170,7 +170,7 @@ ih264_intra_pred_luma_16x16_mode_vert_a9q:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -213,13 +213,13 @@ loop_16x16_horz:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_dc_a9q
+@*ih264_intra_pred_luma_16x16_mode_dc
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:DC
@@ -247,7 +247,7 @@ loop_16x16_horz:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -300,7 +300,7 @@ top_available: @ONLY TOP AVAILABLE
vdup.u8 q0, d0[0]
b str_pred
-left_available: @ONLY LEFT AVAILABLE
+left_available: @ONLY LEFT AVAILABLE
vld1.u8 {q0}, [r0]
vpaddl.u8 q0, q0
vadd.u16 d0, d0, d1
@@ -337,13 +337,13 @@ str_pred:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_plane_a9q
+@*ih264_intra_pred_luma_16x16_mode_plane
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:PLANE
@@ -371,7 +371,7 @@ str_pred:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
diff --git a/common/arm/ih264_intra_pred_luma_4x4_a9q.s b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
index cb386ea..5cc7e23 100755..100644
--- a/common/arm/ih264_intra_pred_luma_4x4_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_4x4_a9q.s
@@ -44,21 +44,16 @@
@* None
@*
@*******************************************************************************
-@*/
-
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
-@
+@*
-@/**
-@/**
-@/**
+@* All the functions here are replicated from ih264_intra_pred_filters.c
@
.text
.p2align 2
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_vert
@@ -128,10 +123,10 @@ ih264_intra_pred_luma_4x4_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_horz
@@ -163,7 +158,7 @@ ih264_intra_pred_luma_4x4_mode_vert_a9q:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -210,10 +205,10 @@ ih264_intra_pred_luma_4x4_mode_horz_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_dc
@@ -244,7 +239,7 @@ ih264_intra_pred_luma_4x4_mode_horz_a9q:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -352,7 +347,7 @@ end_func:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_diag_dl
@@ -383,7 +378,7 @@ end_func:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -434,7 +429,7 @@ end_func_diag_dl:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_diag_dr
@@ -465,7 +460,7 @@ end_func_diag_dl:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -514,7 +509,7 @@ end_func_diag_dr:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_vert_r
@@ -545,7 +540,7 @@ end_func_diag_dr:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -596,7 +591,7 @@ end_func_vert_r:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_horz_d
@@ -627,7 +622,7 @@ end_func_vert_r:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -659,7 +654,7 @@ ih264_intra_pred_luma_4x4_mode_horz_d_a9q:
vqrshrun.s16 d5, q12, #2
sub r5, r3, #2
vmov.8 d6, d5
- vtrn.8 d4, d5 @
+ vtrn.8 d4, d5 @
vst1.u16 {d5[1]}, [r1]!
vst1.16 {d6[2]}, [r1], r5
vst1.u16 {d4[1]}, [r1]!
@@ -678,7 +673,7 @@ end_func_horz_d:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_vert_l
@@ -709,7 +704,7 @@ end_func_horz_d:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -759,7 +754,7 @@ end_func_vert_l:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_horz_u
@@ -790,7 +785,7 @@ end_func_vert_l:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -825,9 +820,9 @@ ih264_intra_pred_luma_4x4_mode_horz_u_a9q:
vext.8 d6, d5, d4, #1
vst1.8 {d4[2]}, [r1]!
vst1.8 {d6[0]}, [r1]!
- vtrn.8 d6, d5 @
+ vtrn.8 d6, d5 @
sub r5, r3, #2
- vtrn.8 d4, d6 @
+ vtrn.8 d4, d6 @
vdup.8 d7, r9
vst1.16 {d6[0]}, [r1], r5
vst1.16 {d6[0]}, [r1]!
diff --git a/common/arm/ih264_intra_pred_luma_8x8_a9q.s b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
index 6da1c95..352d29d 100755..100644
--- a/common/arm/ih264_intra_pred_luma_8x8_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_8x8_a9q.s
@@ -45,17 +45,11 @@
@* None
@*
@*******************************************************************************
-@*/
-
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
-@
+@*
-@/**
-@/**
-@/**
+@* All the functions here are replicated from ih264_intra_pred_filters.c
@
-
.text
.p2align 2
@@ -64,7 +58,7 @@
scratch_intrapred_addr_8x8:
.long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_ref_filtering
@@ -95,7 +89,7 @@ scratch_intrapred_addr_8x8:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst)
@@ -111,7 +105,6 @@ ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
vpush {d8-d15}
-
vld1.u8 {q0}, [r0]! @
vld1.u8 {q1}, [r0]
add r0, r0, #8 @
@@ -141,6 +134,7 @@ ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
end_func_ref_filt:
+
vpop {d8-d15}
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@@ -149,7 +143,7 @@ end_func_ref_filt:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert
@@ -219,10 +213,10 @@ ih264_intra_pred_luma_8x8_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz
@@ -254,7 +248,7 @@ ih264_intra_pred_luma_8x8_mode_vert_a9q:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -299,10 +293,10 @@ loop_8x8_horz:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_dc
@@ -333,7 +327,7 @@ loop_8x8_horz:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -413,7 +407,7 @@ str_pred:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_diag_dl
@@ -444,7 +438,7 @@ str_pred:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -506,7 +500,7 @@ end_func_diag_dl:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_diag_dr
@@ -537,7 +531,7 @@ end_func_diag_dl:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -597,7 +591,7 @@ end_func_diag_dr:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert_r
@@ -628,7 +622,7 @@ end_func_diag_dr:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -717,7 +711,7 @@ end_func_vert_r:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz_d
@@ -748,7 +742,7 @@ end_func_vert_r:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -791,7 +785,7 @@ ih264_intra_pred_luma_8x8_mode_horz_d_a9q:
vmov.8 q4, q2
vmov.8 q5, q3
sub r6, r3, #6
- vtrn.8 q4, q5 @
+ vtrn.8 q4, q5 @
vmov.8 q6, q4
vmov.8 q7, q5
sub r5, r3, #4
@@ -835,7 +829,7 @@ end_func_horz_d:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert_l
@@ -866,7 +860,7 @@ end_func_horz_d:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -887,6 +881,7 @@ ih264_intra_pred_luma_8x8_mode_vert_l_a9q:
stmfd sp!, {r4-r12, r14} @Restoring registers from stack
vpush {d8-d15}
+
add r0, r0, #9
vld1.u8 {q0}, [r0]
add r0, r0, #1
@@ -935,7 +930,7 @@ end_func_vert_l:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz_u
@@ -966,7 +961,7 @@ end_func_vert_l:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
diff --git a/common/arm/ih264_iquant_itrans_recon_a9.s b/common/arm/ih264_iquant_itrans_recon_a9.s
index f71ca69..4e49f6a 100755..100644
--- a/common/arm/ih264_iquant_itrans_recon_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_iquant_itrans_recon_a9.s
@@ -38,8 +38,8 @@
@ * None
@ *
@ *******************************************************************************
-@*/
-@/**
+@*
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -82,7 +82,7 @@
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -225,7 +225,7 @@ ih264_iquant_itrans_recon_4x4_a9:
ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
- @/**
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -268,7 +268,7 @@ ih264_iquant_itrans_recon_4x4_a9:
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -416,7 +416,7 @@ ih264_iquant_itrans_recon_chroma_4x4_a9:
ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
-@/*
+@*
@ *******************************************************************************
@ *
@ * @brief
@@ -459,7 +459,7 @@ ih264_iquant_itrans_recon_chroma_4x4_a9:
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
diff --git a/common/arm/ih264_iquant_itrans_recon_dc_a9.s b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
index 8d71bdb..97c4724 100755..100644
--- a/common/arm/ih264_iquant_itrans_recon_dc_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_iquant_itrans_recon_dc_a9.s
@@ -37,8 +37,8 @@
@ * None
@ *
@ *******************************************************************************
-@*/
-@/**
+@*
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -83,7 +83,7 @@
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -167,7 +167,7 @@ ih264_iquant_itrans_recon_4x4_dc_a9:
-@/*
+@*
@ *******************************************************************************
@ *
@ * @brief
@@ -212,7 +212,7 @@ ih264_iquant_itrans_recon_4x4_dc_a9:
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -300,7 +300,7 @@ ih264_iquant_itrans_recon_8x8_dc_a9:
ldmfd sp!, {r4-r8, r15}
-@ /*
+@ *
@ ********************************************************************************
@ *
@ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
@@ -328,7 +328,7 @@ ih264_iquant_itrans_recon_8x8_dc_a9:
@ * @remarks none
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -368,6 +368,7 @@ ih264_iquant_itrans_recon_chroma_4x4_dc_a9:
vmov.u16 q15, #0x00ff
+
vld1.u8 d18, [r2], r0 @load out [8 bit size) -8 coeffs
vaddw.u8 q1, q0, d2 @Add pred
vld1.u8 d19, [r2], r0
diff --git a/common/arm/ih264_itrans_recon_a9.s b/common/arm/ih264_itrans_recon_a9.s
index 1d74da5..769d5d7 100755..100644
--- a/common/arm/ih264_itrans_recon_a9.s
+++ b/common/arm/ih264_itrans_recon_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_itrans_recon_neon_a9.s
@@ -33,8 +33,8 @@
@ * None
@ *
@ *******************************************************************************
-@*/
-@/**
+@*
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -72,7 +72,7 @@
@ *
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_itrans_recon_4x4(
@ WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
diff --git a/common/arm/ih264_mem_fns_neon.s b/common/arm/ih264_mem_fns_neon.s
index 2808897..39ad9b3 100755..100644
--- a/common/arm/ih264_mem_fns_neon.s
+++ b/common/arm/ih264_mem_fns_neon.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_mem_fns_neon.s
@@ -40,9 +40,9 @@
@ * None
@ *
@ *******************************************************************************
-@*/
+@*
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -65,7 +65,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
@ UWORD8 *pu1_src,
@ UWORD8 num_bytes)
@@ -94,7 +94,7 @@ loop_neon_memcpy_mul_8:
@*******************************************************************************
-@*/
+@*
@void ih264_memcpy(UWORD8 *pu1_dst,
@ UWORD8 *pu1_src,
@ UWORD8 num_bytes)
@@ -143,6 +143,8 @@ loop_memcpy:
+
+
.global ih264_memset_mul_8_a9q
ih264_memset_mul_8_a9q:
@@ -208,6 +210,8 @@ loop_memset:
+
+
.global ih264_memset_16bit_mul_8_a9q
ih264_memset_16bit_mul_8_a9q:
diff --git a/common/arm/ih264_padding_neon.s b/common/arm/ih264_padding_neon.s
index 9bab268..e7a1f91 100755..100644
--- a/common/arm/ih264_padding_neon.s
+++ b/common/arm/ih264_padding_neon.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@*
@ *******************************************************************************
@ * @file
@ * ih264_padding_neon.s
@@ -39,10 +39,10 @@
@ * None
@ *
@ *******************************************************************************
-@*/
+@*
-@/**
+@**
@*******************************************************************************
@*
@* @brief pad at the top of a 2d array
@@ -67,7 +67,7 @@
@* @remarks none
@*
@*******************************************************************************
-@*/
+@*
@void ih264_pad_top(UWORD8 *pu1_src,
@ WORD32 src_strd,
@ WORD32 wd,
@@ -110,7 +110,7 @@ loop_neon_pad_top:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -147,7 +147,7 @@ loop_neon_pad_top:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_LEFT_LUMA == C
@void ih264_pad_left_luma(UWORD8 *pu1_src,
@ WORD32 src_strd,
@@ -160,6 +160,7 @@ loop_neon_pad_top:
@ r3 => pad_size
+
.global ih264_pad_left_luma_a9q
ih264_pad_left_luma_a9q:
@@ -245,7 +246,7 @@ end_func:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -282,7 +283,7 @@ end_func:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_LEFT_CHROMA == C
@void ih264_pad_left_chroma(UWORD8 *pu1_src,
@ WORD32 src_strd,
@@ -373,7 +374,7 @@ end_func_l_c:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -410,7 +411,7 @@ end_func_l_c:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_RIGHT_LUMA == C
@void ih264_pad_right_luma(UWORD8 *pu1_src,
@ WORD32 src_strd,
@@ -519,7 +520,7 @@ end_func_r:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -556,7 +557,7 @@ end_func_r:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_RIGHT_CHROMA == C
@void ih264_pad_right_chroma(UWORD8 *pu1_src,
@ WORD32 src_strd,
diff --git a/common/arm/ih264_platform_macros.h b/common/arm/ih264_platform_macros.h
index 1f67403..1f67403 100755..100644
--- a/common/arm/ih264_platform_macros.h
+++ b/common/arm/ih264_platform_macros.h
diff --git a/common/arm/ih264_resi_trans_a9.s b/common/arm/ih264_resi_trans_a9.s
deleted file mode 100755
index 08821f5..0000000
--- a/common/arm/ih264_resi_trans_a9.s
+++ /dev/null
@@ -1,604 +0,0 @@
-@/******************************************************************************
-@ *
-@ * Copyright (C) 2015 The Android Open Source Project
-@ *
-@ * Licensed under the Apache License, Version 2.0 (the "License");
-@ * you may not use this file except in compliance with the License.
-@ * You may obtain a copy of the License at:
-@ *
-@ * http://www.apache.org/licenses/LICENSE-2.0
-@ *
-@ * Unless required by applicable law or agreed to in writing, software
-@ * distributed under the License is distributed on an "AS IS" BASIS,
-@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ * See the License for the specific language governing permissions and
-@ * limitations under the License.
-@ *
-@ *****************************************************************************
-@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
-@*/
-@/**
-@*******************************************************************************
-@* @file
-@* ih264_resi_trans_a9.s
-@*
-@* @brief
-@* Contains function definitions for residual and forward trans
-@*
-@* @author
-@* Ittiam
-@*
-@* @par List of Functions:
-@* ih264_resi_trans_4x4_a9
-@* ih264_resi_trans_8x8_a9
-@* @remarks
-@* None
-@*
-@*******************************************************************************
-
-
-.text
-.p2align 2
-@*****************************************************************************
-@*
-@* Function Name : ih264_resi_trans_4x4_a9
-@* Description : This function does cf4 of H264 followed by and approximate scaling
-@*
-@* Arguments :
-@ R0 :pointer to src buffer
-@ R1 :pointer to pred buffer
-@ R2 :pointer to dst buffer
-@ R3 :src_stride
-@ STACk :pred_stride,dst_stride
-
-@* Values Returned : NONE
-@*
-@* Register Usage :
-@* Stack Usage :
-@* Cycles : Around
-@* Interruptiaility : Interruptable
-@*
-@* Known Limitations
-@* \Assumptions :
-@*
-@* Revision History :
-@* DD MM YYYY Author(s) Changes
-@* 30 12 2009 100633 First version
-@*
-@*****************************************************************************
-
-
- .global ih264_resi_trans_4x4_a9
- .extern g_scal_coff_h264_4x4
-g_scal_coff_h264_4x4_addr:
- .long g_scal_coff_h264_4x4 - 4x4lbl - 8
-
-ih264_resi_trans_4x4_a9:
-
- @R0 :pointer to src buffer
- @R1 :pointer to pred buffer
- @R2 :pointer to dst buffer
- @R3 :src_stride
- @STACk :pred_stride,dst_stride
-
- push {r4-r12, lr} @push all the variables first
-
- mov r6, sp
- add r6, r6, #40 @decrement stack pointer,to accomodate two variables
- ldmfd r6, {r4-r5} @load the strides into registers
- @R4 pred_stride
- @R5 dst_stride
-
-
- @we have to give the stride as post inrement in VLDR1
- @but since thr stride is from end of row 1 to start of row 2,
- @we need to add the size of the curent row to strides ie we need to add 4 to it (4 bytes)
- @ADD R3,#4
- @ADD R4,#4
- @ADD R5,#4
- @in case of dst the stride represnts 16 bit ie 2*8bits
- @hence we need to add #4 to it and thenm multiply by 2
- @--------------------function loading done------------------------
-
- @lets find residual
- @data is like 1a -> d0[1:31] d0[32:64]
- @ a b c d # # # #
- vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer
- vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer
- @ data is like 1a -> q4[1:63] q4[64:148]
- @ d8[1:63] d9[1:63]
- @ a b c d # # # #
-
- vld1.u8 d28, [r0], r3 @load row 2 of src to d28[0]
- vld1.u8 d29, [r1], r4 @load row2 of pred to d29[0]
-
- vld1.u8 d26, [r0], r3 @load row 3 of src to d26[0]
- vsubl.u8 q0, d30, d31 @curr - pred for row one
-
- vld1.u8 d27, [r1], r4 @load row 3of pred t0 d27[0]
- vsubl.u8 q1, d28, d29 @find row 2 of src -pred to d0
-
- vld1.u8 d24, [r0], r3 @load row 4 of src to d24[0]
-
- vld1.u8 d25, [r1], r4 @load row 4 of src tp d25[0]
- vsubl.u8 q2, d26, d27 @load src-pred row 3 to d[2]
-
- lsl r5, r5, #2 @ multiply dst stride by since we are storing 32 bit values
- ldr r6, g_scal_coff_h264_4x4_addr
-4x4lbl:
- add r6, r6, pc @ load the address of global array
-
- vsubl.u8 q3, d24, d25 @load row 4 of src - pred to q6
-
- @after this
- @D0 -> 1a
- @D2 -> 2a
- @D4 -> 3a
- @D6 -> 4a
-
- @transpose the matrix so that we can do the horizontal transform first
- @#1 #2 #3 #4
- @a b c d ---- D0
- @e f g h -----D2
- @i j k l -----D4
- @m n o p -----D6
- @transpose the inner 2x2 blocks
- vtrn.16 d0, d2
- vld1.s16 {q10}, [r6]! @ load the scaling values 0-7;
- vtrn.16 d4, d6
- @a e c g
- @b f d h
- @i m k o
- @j n l p
- vtrn.32 d0, d4
- vtrn.32 d2, d6
- @a e i m #1 -- D0 --- x4
- @b f j n #2 -- D2 --- x5
- @c g k o #3 -- D4 ----x6
- @d h l p #4 -- D6 ----x7
-
- @we have loaded the residuals into the registers , now we need to add and subtract them
- @let us do the horiz transform first
-
- vsub.s16 d5, d2, d4 @x2 = x5-x6
- vsub.s16 d7, d0, d6 @x3 = x4-x7;
-
- vadd.s16 d3, d2, d4 @x1 = x5+x6
- vadd.s16 d1, d0, d6 @x0 = x4+x7
-
-
- vshl.s16 d31, d7, #1 @
- vshl.s16 d30, d5, #1 @
-
- vadd.s16 d0, d1, d3 @x0 + x1;
- vsub.s16 d4, d1, d3 @x0 - x1;
-
- vadd.s16 d2, d31, d5 @U_SHIFT(x3,1,shft) + x2;
- vsub.s16 d6, d7, d30 @x3 - U_SHIFT(x2,1,shft);
-
- @taking transform again so as to make do vert transform
- vtrn.16 d0, d2
- vtrn.16 d4, d6
-
- vtrn.32 d0, d4
- vtrn.32 d2, d6
-
- @let us do vertical transform
- @same code as horiz
-
- vadd.s16 d1, d0, d6 @x0 = x4+x7
- vadd.s16 d3, d2, d4 @x1 = x5+x6
- vsub.s16 d7, d0, d6 @x3 = x4-x7;
- vsub.s16 d5, d2, d4 @x2 = x5-x6
-
-
-@Since we are going to do scal / quant or whatever, we are going to divide by
-@a 32 bit number. So we have to expand the values
-
- @VADDL.S16 Q12,D1,D3;x0 + x1
- @VSUBL.S16 Q14,D1,D3;x0 - x1
-
- @VSHL.S16 D8,D5,#1;
- @VSHL.S16 D9,D7,#1;
-
- @VADDL.S16 Q13,D9,D5 ; + x2
- @VSUBL.S16 Q15,D7,D8 ;x3 - U_SHIFT(x2,1,shft)
-
-@scaling follows
-
-@now we need to do the scaling,so load the scaling matrix
-@mutliplying by the scaling coeffient; store the results from q5-q8 ;
-
- vadd.s16 d24, d3, d1 @x4 = x0 + x1
- vsub.s16 d28, d1, d3 @x6 = x0 - x1
-
- vshl.s16 d0, d7, #1 @ U_SHIFT(x3,1,shft)
- vmull.s16 q4, d24, d20 @x4*s0
-
- vshl.s16 d2, d5, #1 @ U_SHIFT(x2,1,shft)
-
- vadd.s16 d26, d0, d5 @x5 = U_SHIFT(x3,1,shft) + x2
- vmull.s16 q5, d26, d21 @x5*s1
-
- vst1.s32 {q4}, [r2], r5 @save 4 pixels of row1 current buffer and increment pointer by stride
-
- vld1.s16 {q10}, [r6] @load 8-16 scaling coeffcients
-
- vsub.s16 d30, d7, d2 @x7 = x3 - U_SHIFT(x2,1,shft)
-
- vmull.s16 q6, d28, d20 @x6*s2
- vst1.s32 {q5}, [r2], r5
-
- vmull.s16 q7, d30, d21 @x7*s3
-
-
- vst1.s32 {q6}, [r2], r5
- vst1.s32 {q7}, [r2]
-
- pop {r4-r12, pc} @pop back all variables
-
-
-
-
-@*****************************************************************************
-@* Function Name : ih264_resi_trans_8x8_a9
-@* Description : This function does cf8 followd by an approximate normalization of H264
-@*
-@* Arguments :
-@* R0 :pointer to src buffer
-@ R1 :pointer to pred buffer
-@ R2 :pointer to dst buffer
-@ R3 :src_stride
-@ STACk :pred_stride,dst_st
-@*
-@*
-@* Values Returned : NONE
-@*
-@* Register Usage :
-@* Stack Usage :
-@* Cycles : Around
-@* Interruptiaility : Interruptable
-@*
-@* Known Limitations
-@* \Assumptions :
-@*
-@* Revision History :
-@* DD MM YYYY Author(s) Changes
-@* 30 12 2009 100633 First version
-@*
-@*****************************************************************************
-
-
- .global ih264_resi_trans_8x8_a9
- .extern g_scal_coff_h264_8x8
-g_scal_coff_h264_8x8_addr:
- .long g_scal_coff_h264_8x8 - 8x8lbl - 8
-
-
-ih264_resi_trans_8x8_a9:
-
- @R0 :pointer to src buffer
- @R1 :pointer to pred buffer
- @R2 :pointer to dst buffer
- @R3 :src_stride
- @STACk :pred_stride,dst_stride
-
- push {r4-r12, lr} @push all the variables first
-
- mov r6, sp
- add r6, r6, #40 @decrement stack pointer,to accomodate two variables
- ldmfd r6, {r4-r5} @load the strides into registers
- @R4 pred_stride
- @R5 dst_stride
-
- @we have to give the stride as post inrement in vst1
- @in case of dst the stride represnts 16 bit ie 2*8bits
- @hence we need to add #4 to it and thenm multiply by 2
- @--------------------function loading done------------------------
-
- @lets find residual
- @data is like 1a -> d0[1:31] d0[32:64]
- @ a b c d # # # #
- vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer
- vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer
-
- vld1.u8 d28, [r0], r3 @src rw2
- vld1.u8 d29, [r1], r4 @pred rw2
- vsubl.u8 q0, d30, d31 @src-pred rw1
-
- vld1.u8 d26, [r0], r3
- vld1.u8 d27, [r1], r4
- vsubl.u8 q1, d28, d29
-
- vld1.u8 d24, [r0], r3
- vld1.u8 d25, [r1], r4
- vsubl.u8 q2, d26, d27
-
- vld1.u8 d22, [r0], r3
- vld1.u8 d23, [r1], r4
- vsubl.u8 q3, d24, d25
-
- vld1.u8 d20, [r0], r3
- vld1.u8 d21, [r1], r4
- vsubl.u8 q4, d22, d23
-
- vld1.u8 d18, [r0], r3
- vld1.u8 d19, [r1], r4
- vsubl.u8 q5, d20, d21
-
- vld1.u8 d16, [r0], r3
- vld1.u8 d17, [r1], r4
- vsubl.u8 q6, d18, d19
-
- lsl r5, r5, #2
-
-
- vsubl.u8 q7, d16, d17
-
- @after this
- @Q0 -> 1a
- @Q1 -> 2a
- @Q2 -> 3a
- @Q3 -> 4a
- @Q4 -> 5a
- @Q5 -> 6a
- @Q6 -> 7a
- @Q7 -> 8a
-
- @transpose the matrix so that we can do the horizontal transform first
-
- @transpose the inner 2x2 blocks
- vtrn.16 q0, q1
- vtrn.16 q2, q3
- vtrn.16 q4, q5
- vtrn.16 q6, q7
-
- @transpose the inner 4x4 blocks
- vtrn.32 q0, q2
- vtrn.32 q1, q3
-
- vtrn.32 q4, q6
- vtrn.32 q5, q7
-
- @transpose the outer 8x8 blocks
- vswp d1, d8
- vswp d7, d14
- vswp d3, d10
- vswp d5, d12
- @transpose done
-
-@@this point we will have data in Q0-Q7
-@Q7 will be populated within 2 clock cycle
-@all others are availabe @ this clock cycle
-
- @we have loaded the residuals into the registers , now we need to add and subtract them
- @let us do the horiz transform first
-
- vadd.s16 q8, q0, q7 @ a0 = r0 + r7;
- vadd.s16 q9, q1, q6 @ a1 = r1 + r6;
- vadd.s16 q10, q2, q5 @ a2 = r2 + r5;
- vadd.s16 q11, q3, q4 @ a3 = r3 + r4;
-
- vsub.s16 q12, q0, q7 @ b0 = r0 - r7;
- vsub.s16 q13, q1, q6 @ b1 = r1 - r6;
- vsub.s16 q15, q3, q4 @ b3 = r3 - r4;
- vsub.s16 q14, q2, q5 @ b2 = r2 - r5;
-
- vadd.s16 q1, q8, q11 @ a4 = a0 + a3;
- vadd.s16 q3, q9, q10 @ a5 = a1 + a2;
- vsub.s16 q7, q9, q10 @ a7 = a1 - a2;
- vsub.s16 q5, q8, q11 @ a6 = a0 - a3;
-
- ldr r6, g_scal_coff_h264_8x8_addr
-8x8lbl:
- add r6, r6, pc @ load the address of global array
-
- vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5;
- vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
-
- vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5;
-
- vadd.s16 q2, q5, q8 @
-
-
- vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
- vsub.s16 q6, q9, q7 @
-
-@do not change Q0,Q2.Q4,Q6 they contain results
-@Q1,Q3,Q5,Q7 TO STORE RESULTS
-@Q8 Q9 Q10 Q11 USE @WILL
-
- vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft)
- vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft)
- vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft)
- vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft)
-
- vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0);
- vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0);
- vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2);
- vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
- vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft)
- vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft);
- vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft);
- vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft);
-
-
- vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
- vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
- vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
- vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
-
- @------------horiz transform done-------------------------
- @results are in Q0-Q7
- @all other neon registes can be used at will
-
-@doing vertical transform
-@code exact copy of horiz transform above
-
- @transpose the inner 2x2 blocks
- vtrn.16 q0, q1
- vtrn.16 q2, q3
- vtrn.16 q4, q5
- vtrn.16 q6, q7
-
- @transpose the inner 4x4 blocks
- vtrn.32 q0, q2
- vtrn.32 q1, q3
-
- vtrn.32 q4, q6
- vtrn.32 q5, q7
-
- @transpose the outer 8x8 blocks
- vswp d1, d8
- vswp d3, d10
- vswp d5, d12
- vswp d7, d14
-
- @transpose done
-
- vadd.s16 q8, q0, q7 @ a0 = r0 + r7;
- vadd.s16 q9, q1, q6 @ a1 = r1 + r6;
- vadd.s16 q10, q2, q5 @ a2 = r2 + r5;
- vadd.s16 q11, q3, q4 @ a3 = r3 + r4;
-
- vsub.s16 q12, q0, q7 @ b0 = r0 - r7;
- vsub.s16 q13, q1, q6 @ b1 = r1 - r6;
- vsub.s16 q14, q2, q5 @ b2 = r2 - r5;
- vsub.s16 q15, q3, q4 @ b3 = r3 - r4;
-
- vadd.s16 q1, q8, q11 @ a4 = a0 + a3;
- vadd.s16 q3, q9, q10 @ a5 = a1 + a2;
- vsub.s16 q5, q8, q11 @ a6 = a0 - a3;
- vsub.s16 q7, q9, q10 @ a7 = a1 - a2;
-
-
- vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5;
-
- vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
- @DSHIFT_TO_0 Q8,Q7,#1,#0
- vadd.s16 q2, q5, q8 @
-
- vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5;
-
- vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
- vsub.s16 q6, q9, q7 @
-
-@do not change Q0,Q2.Q4,Q6 they contain results
-@Q1,Q3,Q5,Q7 TO STORE RESULTS
-@Q8 Q9 Q10 Q11 USE @WILL
-
- vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft)
- vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft)
- vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft)
- vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft)
-
-
- vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0);
- vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0);
- vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2);
- vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1);
- vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
- vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft)
- vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft);
- vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft);
- vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft);
-
-
-@since we are going to scal by small values, we need not expand the guys to 32 bit bit values
- vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
- vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
- vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
- vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
-
- @------------vert transform done-------------------------
- @results are in Q0-Q7
- @all other neon registes can be used at will
-
- @scaling
- @since the 8x8 scaling matrix repeats in 1x4,1x4 block ,
- @we need only load 4 values for each row and in total 4 rows
- vld1.s16 {q14-q15}, [r6] @
-
- @since we need to get a 32 bit o/p for two 16 bit multiplications
- @we need a VMULL instruction
-@-----------------------------first and second row
-
- vmull.s16 q8, d0, d28 @scale the first row first 4 elem
- vmull.s16 q9, d28, d1 @scale the second row last 4 elemts
-
- vmull.s16 q10, d2, d29 @ scale second row first 4 elem
- vmull.s16 q11, d29, d3 @scale the second row last 4 elem
- vmull.s16 q12, d4, d30 @scale third row first 4 elem
-
- vst1.s32 {q8, q9}, [r2], r5 @ write the first row complete
-
- vmull.s16 q13, d30, d5 @scale the third row last 4 elem
- vmull.s16 q8, d6, d31 @scale the fourth row first 4 elem
-
-
- vst1.s32 {q10, q11}, [r2], r5 @store the second row complete
-
-@------------------------------- 3rd and 4th row
-
- vmull.s16 q9, d31, d7 @scale the fourth row second column
-
- vst1.s32 {q12, q13}, [r2], r5 @store the third row complete
-
- vmull.s16 q10, d8, d28 @scale the 5th row fisrst 4 elms
- vmull.s16 q11, d28, d9 @scale the 5th row second 4 elems
-
- vmull.s16 q12, d10, d29 @scale the 6th row first4 elements
-
-
- vst1.s32 {q8, q9}, [r2], r5 @store fifth row
-
-@--------------------------------5th and 6th row
-
- vmull.s16 q13, d29, d11 @scale 6th row sendond 4 elems
-
- vmull.s16 q8, d12, d30 @scale 7th rw first 4 elms
-
- vst1.s32 {q10, q11}, [r2], r5 @store 6th row second 4 elements
-
- vmull.s16 q9, d30, d13 @scale 7th rw second 4 elms
- vmull.s16 q10, d14, d31 @scale 8th rw forst 4 elms
-
-
- vst1.s32 {q12, q13}, [r2], r5 @store 6th row
-
-@----------------------------------7th and 8th row
- vmull.s16 q11, d31, d15 @scale 8th row second 4 elms
-
- vst1.s32 {q8, q9}, [r2], r5 @store 7th row
- vst1.s32 {q10, q11}, [r2], r5 @store 8th row
-
-@----------------------------------done writing
-
- pop {r4-r12, pc} @pop back all variables
-
-
-
-
-
-
diff --git a/common/arm/ih264_resi_trans_quant_a9.s b/common/arm/ih264_resi_trans_quant_a9.s
index caf362e..bb836bd 100755..100644
--- a/common/arm/ih264_resi_trans_quant_a9.s
+++ b/common/arm/ih264_resi_trans_quant_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@*******************************************************************************
@* @file
@* ih264_resi_trans_quant_a9.s
diff --git a/common/arm/ih264_weighted_bi_pred_a9q.s b/common/arm/ih264_weighted_bi_pred_a9q.s
index ccae779..33859e6 100755..100644
--- a/common/arm/ih264_weighted_bi_pred_a9q.s
+++ b/common/arm/ih264_weighted_bi_pred_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_weighted_bi_pred_a9q.s
@@ -37,7 +37,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@*******************************************************************************
@* @function
@* ih264_weighted_bi_pred_luma_a9q()
@@ -96,7 +96,7 @@
@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_bi_pred_luma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
@@ -411,7 +411,7 @@ end_loops:
@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_bi_pred_chroma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_weighted_pred_a9q.s b/common/arm/ih264_weighted_pred_a9q.s
index 1ce94d0..81d26d4 100755..100644
--- a/common/arm/ih264_weighted_pred_a9q.s
+++ b/common/arm/ih264_weighted_pred_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_weighted_pred_a9q.s
@@ -37,7 +37,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@*******************************************************************************
@* @function
@* ih264_weighted_pred_luma_a9q()
@@ -84,7 +84,7 @@
@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_pred_luma_a9q(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -314,7 +314,7 @@ end_loops:
@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_pred_chroma_a9q(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,