diff options
author | Venkatarama Avadhani <venkatarama.avadhani@ittiam.com> | 2015-03-11 10:08:57 +0530 |
---|---|---|
committer | Harish Mahendrakar <harish.mahendrakar@ittiam.com> | 2015-04-07 18:19:15 +0530 |
commit | aed24eee7ddfc93f1436b0c1679431bd286879b4 (patch) | |
tree | 9399f32cdfa15ac9720ded9c8a8093876ba03376 | |
parent | 839aea316dc98d258d75f7e2878b21db032a82c1 (diff) | |
download | android_external_libmpeg2-aed24eee7ddfc93f1436b0c1679431bd286879b4.tar.gz android_external_libmpeg2-aed24eee7ddfc93f1436b0c1679431bd286879b4.tar.bz2 android_external_libmpeg2-aed24eee7ddfc93f1436b0c1679431bd286879b4.zip |
Initial version
Change-Id: I5a9473876e596e7461e91f971b0243f694f7e8fb
85 files changed, 32265 insertions, 0 deletions
diff --git a/Android.mk b/Android.mk new file mode 100644 index 0000000..4668c52 --- /dev/null +++ b/Android.mk @@ -0,0 +1,6 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# decoder +include $(LOCAL_PATH)/decoder.mk + diff --git a/common/arm/impeg2_format_conv.s b/common/arm/impeg2_format_conv.s new file mode 100644 index 0000000..c07edda --- /dev/null +++ b/common/arm/impeg2_format_conv.s @@ -0,0 +1,391 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + +@/* +@//---------------------------------------------------------------------------- +@// File Name : impeg2_format_conv.s +@// +@// Description : This file has the Idct Implementations for the +@// MPEG4 SP decoder on neon platform. +@// +@// Reference Document : +@// +@// Revision History : +@// Date Author Detail Description +@// ------------ ---------------- ---------------------------------- +@// Jul 07, 2008 Naveen Kumar T Created +@// +@//------------------------------------------------------------------------- +@*/ + +@/* +@// ---------------------------------------------------------------------------- +@// Include Files +@// ---------------------------------------------------------------------------- +@*/ +.text +.p2align 2 +.equ log2_16 , 4 +.equ log2_2 , 1 +@/* +@// ---------------------------------------------------------------------------- +@// Struct/Union Types and Define +@// ---------------------------------------------------------------------------- +@*/ + +@/* +@// ---------------------------------------------------------------------------- +@// Static Global Data section variables +@// ---------------------------------------------------------------------------- +@*/ +@//--------------------------- NONE -------------------------------------------- + +@/* +@// ---------------------------------------------------------------------------- +@// Static Prototype Functions +@// ---------------------------------------------------------------------------- +@*/ +@// -------------------------- NONE -------------------------------------------- + +@/* +@// ---------------------------------------------------------------------------- +@// Exported functions +@// ---------------------------------------------------------------------------- +@*/ + +@/***************************************************************************** +@* * +@* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q() * +@* * +@* Description : This function conversts the image from YUV420P color * +@* space to 420SP color space(UV interleaved). * +@* * +@* Arguments : R0 pu1_y * +@* R1 pu1_u * +@* R2 pu1_v * +@* R3 pu1_dest_y * +@* [R13 #40] pu1_dest_uv * +@* [R13 #44] u2_height * +@* [R13 #48] u2_width * +@* [R13 #52] u2_stridey * +@* [R13 #56] u2_strideu * +@* [R13 #60] u2_stridev * +@* [R13 #64] u2_dest_stride_y * +@* [R13 #68] u2_dest_stride_uv * +@* [R13 #72] convert_uv_only * +@* * +@* Values Returned : None * +@* * +@* Register Usage : R0 - R8, Q0 * +@* * +@* Stack Usage : 24 Bytes * +@* * +@* Interruptibility : Interruptible * +@* * +@* Known Limitations * +@* Assumptions: Image Width: Assumed to be multiple of 16 and * +@* greater than or equal to 16 * +@* Image Height: Assumed to be even. * +@* * +@* Revision History : * +@* DD MM YYYY Author(s) Changes (Describe the changes made) * +@* 07 06 2010 Varshita Draft * +@* 07 06 2010 Naveen Kr T Completed * +@* * +@*****************************************************************************/ + .global impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q +impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q: + + @// push the registers on the stack + stmfd sp!, {r4-r8, lr} + + ldr r4, [sp, #56] @// Load convert_uv_only + + cmp r4, #1 + beq yuv420sp_uv_chroma + @/* Do the preprocessing before the main loops start */ + @// Load the parameters from stack + ldr r4, [sp, #28] @// Load u2_height from stack + + ldr r5, [sp, #32] @// Load u2_width from stack + + ldr r7, [sp, #36] @// Load u2_stridey from stack + + ldr r8, [sp, #48] @// Load u2_dest_stride_y from stack + + sub r7, r7, r5 @// Source increment + + sub r8, r8, r5 @// Destination increment + + +yuv420sp_uv_row_loop_y: + mov r6, r5 + +yuv420sp_uv_col_loop_y: + pld [r0, #128] + vld1.8 {q0}, [r0]! + vst1.8 {q0}, [r3]! + sub r6, r6, #16 + cmp r6, #15 + bgt yuv420sp_uv_col_loop_y + + cmp r6, #0 + beq yuv420sp_uv_row_loop_end_y + @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + @//Ex if width is 162, above loop will process 160 pixels. And + @//Both source and destination will point to 146th pixel and then 16 bytes will be read + @// and written using VLD1 and VST1 + rsb r6, r6, #16 + sub r0, r0, r6 + sub r3, r3, r6 + + vld1.8 {q0}, [r0]! + vst1.8 {q0}, [r3]! + +yuv420sp_uv_row_loop_end_y: + add r0, r0, r7 + add r3, r3, r8 + subs r4, r4, #1 + bgt yuv420sp_uv_row_loop_y + +yuv420sp_uv_chroma: + + ldr r3, [sp, #24] @// Load pu1_dest_uv from stack + + ldr r4, [sp, #28] @// Load u2_height from stack + + ldr r5, [sp, #32] @// Load u2_width from stack + + + ldr r7, [sp, #40] @// Load u2_strideu from stack + + ldr r8, [sp, #52] @// Load u2_dest_stride_uv from stack + + sub r7, r7, r5, lsr #1 @// Source increment + + sub r8, r8, r5 @// Destination increment + + mov r5, r5, lsr #1 + mov r4, r4, lsr #1 + ldr r3, [sp, #24] @// Load pu1_dest_uv from stack +yuv420sp_uv_row_loop_uv: + mov r6, r5 + + +yuv420sp_uv_col_loop_uv: + pld [r1, #128] + pld [r2, #128] + vld1.8 d0, [r1]! + vld1.8 d1, [r2]! + vst2.8 {d0, d1}, [r3]! + sub r6, r6, #8 + cmp r6, #7 + bgt yuv420sp_uv_col_loop_uv + + cmp r6, #0 + beq yuv420sp_uv_row_loop_end_uv + @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + @//Ex if width is 162, above loop will process 160 pixels. And + @//Both source and destination will point to 146th pixel and then 16 bytes will be read + @// and written using VLD1 and VST1 + rsb r6, r6, #8 + sub r1, r1, r6 + sub r2, r2, r6 + sub r3, r3, r6, lsl #1 + + vld1.8 d0, [r1]! + vld1.8 d1, [r2]! + vst2.8 {d0, d1}, [r3]! + +yuv420sp_uv_row_loop_end_uv: + add r1, r1, r7 + add r2, r2, r7 + add r3, r3, r8 + subs r4, r4, #1 + bgt yuv420sp_uv_row_loop_uv + @//POP THE REGISTERS + ldmfd sp!, {r4-r8, pc} + + + + + +@/***************************************************************************** +@* * +@* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q() * +@* * +@* Description : This function conversts the image from YUV420P color * +@* space to 420SP color space(VU interleaved). * +@* This function is similar to above function * +@* IMP4D_CXA8_YUV420toYUV420SP_VU with a difference in * +@* VLD1.8 for chroma - order of registers is different * +@* * +@* Arguments : R0 pu1_y * +@* R1 pu1_u * +@* R2 pu1_v * +@* R3 pu1_dest_y * +@* [R13 #40] pu1_dest_uv * +@* [R13 #44] u2_height * +@* [R13 #48] u2_width * +@* [R13 #52] u2_stridey * +@* [R13 #56] u2_strideu * +@* [R13 #60] u2_stridev * +@* [R13 #64] u2_dest_stride_y * +@* [R13 #68] u2_dest_stride_uv * +@* [R13 #72] convert_uv_only * +@* * +@* Values Returned : None * +@* * +@* Register Usage : R0 - R8, Q0 * +@* * +@* Stack Usage : 24 Bytes * +@* * +@* Interruptibility : Interruptible * +@* * +@* Known Limitations * +@* Assumptions: Image Width: Assumed to be multiple of 16 and * +@* greater than or equal to 16 * +@* Image Height: Assumed to be even. * +@* * +@* Revision History : * +@* DD MM YYYY Author(s) Changes (Describe the changes made) * +@* 07 06 2010 Varshita Draft * +@* 07 06 2010 Naveen Kr T Completed * +@* * +@*****************************************************************************/ + + .global impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q +impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q: + + @// push the registers on the stack + stmfd sp!, {r4-r8, lr} + + ldr r4, [sp, #56] @// Load convert_uv_only + + cmp r4, #1 + beq yuv420sp_vu_chroma + + @/* Do the preprocessing before the main loops start */ + @// Load the parameters from stack + ldr r4, [sp, #28] @// Load u2_height from stack + + ldr r5, [sp, #32] @// Load u2_width from stack + + ldr r7, [sp, #36] @// Load u2_stridey from stack + + ldr r8, [sp, #48] @// Load u2_dest_stride_y from stack + + sub r7, r7, r5 @// Source increment + + sub r8, r8, r5 @// Destination increment + + +yuv420sp_vu_row_loop_y: + mov r6, r5 + +yuv420sp_vu_col_loop_y: + pld [r0, #128] + vld1.8 {q0}, [r0]! + vst1.8 {q0}, [r3]! + sub r6, r6, #16 + cmp r6, #15 + bgt yuv420sp_vu_col_loop_y + + cmp r6, #0 + beq yuv420sp_vu_row_loop_end_y + @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + @//Ex if width is 162, above loop will process 160 pixels. And + @//Both source and destination will point to 146th pixel and then 16 bytes will be read + @// and written using VLD1 and VST1 + rsb r6, r6, #16 + sub r0, r0, r6 + sub r3, r3, r6 + + vld1.8 {q0}, [r0]! + vst1.8 {q0}, [r3]! + +yuv420sp_vu_row_loop_end_y: + add r0, r0, r7 + add r3, r3, r8 + subs r4, r4, #1 + bgt yuv420sp_vu_row_loop_y + +yuv420sp_vu_chroma: + + ldr r3, [sp, #24] @// Load pu1_dest_uv from stack + + ldr r4, [sp, #28] @// Load u2_height from stack + + ldr r5, [sp, #32] @// Load u2_width from stack + + + ldr r7, [sp, #40] @// Load u2_strideu from stack + + ldr r8, [sp, #52] @// Load u2_dest_stride_uv from stack + + sub r7, r7, r5, lsr #1 @// Source increment + + sub r8, r8, r5 @// Destination increment + + mov r5, r5, lsr #1 + mov r4, r4, lsr #1 + ldr r3, [sp, #24] @// Load pu1_dest_uv from stack +yuv420sp_vu_row_loop_uv: + mov r6, r5 + + +yuv420sp_vu_col_loop_uv: + pld [r1, #128] + pld [r2, #128] + vld1.8 d1, [r1]! + vld1.8 d0, [r2]! + vst2.8 {d0, d1}, [r3]! + sub r6, r6, #8 + cmp r6, #7 + bgt yuv420sp_vu_col_loop_uv + + cmp r6, #0 + beq yuv420sp_vu_row_loop_end_uv + @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + @//Ex if width is 162, above loop will process 160 pixels. And + @//Both source and destination will point to 146th pixel and then 16 bytes will be read + @// and written using VLD1 and VST1 + rsb r6, r6, #8 + sub r1, r1, r6 + sub r2, r2, r6 + sub r3, r3, r6, lsl #1 + + vld1.8 d1, [r1]! + vld1.8 d0, [r2]! + vst2.8 {d0, d1}, [r3]! + +yuv420sp_vu_row_loop_end_uv: + add r1, r1, r7 + add r2, r2, r7 + add r3, r3, r8 + subs r4, r4, #1 + bgt yuv420sp_vu_row_loop_uv + @//POP THE REGISTERS + ldmfd sp!, {r4-r8, pc} + + + + + diff --git a/common/arm/impeg2_idct.s b/common/arm/impeg2_idct.s new file mode 100644 index 0000000..22225bf --- /dev/null +++ b/common/arm/impeg2_idct.s @@ -0,0 +1,1204 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + +@/* +@//---------------------------------------------------------------------------- +@// File Name : impeg2_idct.s +@// +@// Description : This file has the Idct Implementations for the +@// MPEG2 SP decoder on neon platform. +@// +@// Reference Document : +@// +@// Revision History : +@// Date Author Detail Description +@// ------------ ---------------- ---------------------------------- +@// Feb 22, 2008 Naveen Kumar T Created +@// +@//------------------------------------------------------------------------- +@*/ + +@/* +@// ---------------------------------------------------------------------------- +@// Include Files +@// ---------------------------------------------------------------------------- +@*/ + +.text +.p2align 2 +.equ idct_stg1_shift , 12 +.equ idct_stg2_shift , 16 +.equ idct_stg1_round , (1 << (idct_stg1_shift - 1)) +.equ idct_stg2_round , (1 << (idct_stg2_shift - 1)) +@/* +@// ---------------------------------------------------------------------------- +@// Struct/Union Types and Define +@// ---------------------------------------------------------------------------- +@*/ + +@/* +@// ---------------------------------------------------------------------------- +@// Static Global Data section variables +@// ---------------------------------------------------------------------------- +@*/ +@//--------------------------- NONE -------------------------------------------- + +@/* +@// ---------------------------------------------------------------------------- +@// Static Prototype Functions +@// ---------------------------------------------------------------------------- +@*/ +@// -------------------------- NONE -------------------------------------------- + +@/* +@// ---------------------------------------------------------------------------- +@// Exported functions +@// ---------------------------------------------------------------------------- +@*/ + + .extern gai2_impeg2_idct_q15 +.hidden gai2_impeg2_idct_q15 + .extern gai2_impeg2_idct_q11 +.hidden gai2_impeg2_idct_q11 + .extern gai2_impeg2_idct_first_col_q15 +.hidden gai2_impeg2_idct_first_col_q15 + .extern gai2_impeg2_idct_first_col_q11 +.hidden gai2_impeg2_idct_first_col_q11 + .extern gai2_impeg2_mismatch_stg2_additive +.hidden gai2_impeg2_mismatch_stg2_additive + +gai2_impeg2_idct_q15_addr1: + .long gai2_impeg2_idct_q15 - q15lbl1 - 8 +gai2_impeg2_idct_q15_addr2: + .long gai2_impeg2_idct_q15 - q15lbl2 - 8 +gai2_impeg2_idct_q11_addr1: + .long gai2_impeg2_idct_q11 - q11lbl1 - 8 +gai2_impeg2_idct_q11_addr2: + .long gai2_impeg2_idct_q11 - q11lbl2 - 8 +gai2_impeg2_idct_first_col_q15_addr1: + .long gai2_impeg2_idct_first_col_q15 - fcq15_lbl1 - 8 +gai2_impeg2_idct_first_col_q15_addr2: + .long gai2_impeg2_idct_first_col_q15 - fcq15_lbl2 - 8 +gai2_impeg2_idct_first_col_q15_addr3: + .long gai2_impeg2_idct_first_col_q15 - fcq15_lbl3 - 8 +gai2_impeg2_mismatch_stg2_additive_addr: + .long gai2_impeg2_mismatch_stg2_additive - additive_lbl - 8 +gai2_impeg2_idct_first_col_q11_addr1: + .long gai2_impeg2_idct_first_col_q11 - fcq11_lbl1 - 8 +gai2_impeg2_idct_first_col_q11_addr2: + .long gai2_impeg2_idct_first_col_q11 - fcq11_lbl2 - 8 + + .global impeg2_idct_recon_dc_a9q +impeg2_idct_recon_dc_a9q: + stmfd sp!, {r4, r6, r12, lr} + @//r0: pi2_src + @//r1: pi2_tmp - not used, used as pred_strd + @//r2: pu1_pred + @//r3: pu1_dst + @//r4: used as scratch + @//r5: + + ldr r1, [sp, #20] @//pred_strd + ldr r6, [sp, #24] @//dst_strd + + ldr r14, gai2_impeg2_idct_q15_addr1 +q15lbl1: + add r14, r14, pc + ldrsh r12, [r14] + ldrsh r4, [r0] + + vld1.8 d0, [r2], r1 + mul r4, r4, r12 + + vld1.8 d1, [r2], r1 + add r4, #idct_stg1_round + + vld1.8 d2, [r2], r1 + asr r4, r4, #idct_stg1_shift + + ldr r14, gai2_impeg2_idct_q11_addr1 +q11lbl1: + add r14, r14, pc + ldrsh r12, [r14] + + vld1.8 d3, [r2], r1 + mul r4, r4, r12 + + vld1.8 d4, [r2], r1 + add r4, #idct_stg2_round + + vld1.8 d5, [r2], r1 + asr r4, r4, #idct_stg2_shift + + vld1.8 d6, [r2], r1 + vdup.s16 q15, r4 + + + vld1.8 d7, [r2], r1 + + vaddw.u8 q4, q15, d0 + + vaddw.u8 q5, q15, d1 + vqmovun.s16 d0, q4 + + vaddw.u8 q6, q15, d2 + vqmovun.s16 d1, q5 + vst1.8 d0, [r3], r6 + + vaddw.u8 q7, q15, d3 + vqmovun.s16 d2, q6 + vst1.8 d1, [r3], r6 + + vaddw.u8 q8, q15, d4 + vqmovun.s16 d3, q7 + vst1.8 d2, [r3], r6 + + vaddw.u8 q9, q15, d5 + vqmovun.s16 d4, q8 + vst1.8 d3, [r3], r6 + + vaddw.u8 q10, q15, d6 + vqmovun.s16 d5, q9 + vst1.8 d4, [r3], r6 + + vaddw.u8 q11, q15, d7 + vqmovun.s16 d6, q10 + vst1.8 d5, [r3], r6 + + vqmovun.s16 d7, q11 + vst1.8 d6, [r3], r6 + + + vst1.8 d7, [r3], r6 + + ldmfd sp!, {r4, r6, r12, pc} + + + + + .global impeg2_idct_recon_dc_mismatch_a9q +impeg2_idct_recon_dc_mismatch_a9q: + stmfd sp!, {r4-r12, lr} + + ldr r1, [sp, #44] @//pred_strd + ldr r6, [sp, #48] @//dst_strd + + ldr r14, gai2_impeg2_idct_q15_addr2 +q15lbl2: + add r14, r14, pc + ldrsh r12, [r14] + ldrsh r4, [r0] + + mul r4, r4, r12 + add r4, #idct_stg1_round + asr r4, r4, #idct_stg1_shift + + ldr r14, gai2_impeg2_idct_q11_addr2 +q11lbl2: + add r14, r14, pc + ldrsh r12, [r14] + mul r4, r4, r12 + vdup.s32 q0, r4 + + mov r14, #16 @//Increment for table read + ldr r4, gai2_impeg2_mismatch_stg2_additive_addr +additive_lbl: + add r4, r4, pc + + vld1.16 {q1}, [r4], r14 + + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + vld1.16 {q1}, [r4], r14 + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + vld1.16 {q1}, [r4], r14 + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + vld1.16 {q1}, [r4], r14 + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + vld1.16 {q1}, [r4], r14 + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + vld1.16 {q1}, [r4], r14 + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + vld1.16 {q1}, [r4], r14 + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + vld1.16 {q1}, [r4], r14 + vld1.8 d30, [r2], r1 + vmovl.s16 q4, d2 + vmovl.s16 q5, d3 + vraddhn.s32 d12, q0, q4 + vraddhn.s32 d13, q0, q5 + vaddw.u8 q7, q6, d30 + vqmovun.s16 d30, q7 + vst1.8 d30, [r3], r6 + + + ldmfd sp!, {r4-r12, pc} + + + + +@/** +@ ******************************************************************************* +@ * +@ * ;brief +@ * This function performs Inverse transform and reconstruction for 8x8 +@ * input block +@ * +@ * ;par Description: +@ * Performs inverse transform and adds the prediction data and clips output +@ * to 8 bit +@ * +@ * ;param[in] pi2_src +@ * Input 8x8 coefficients +@ * +@ * ;param[in] pi2_tmp +@ * Temporary 8x8 buffer for storing inverse +@ * +@ * transform +@ * 1st stage output +@ * +@ * ;param[in] pu1_pred +@ * Prediction 8x8 block +@ * +@ * ;param[out] pu1_dst +@ * Output 8x8 block +@ * +@ * ;param[in] src_strd +@ * Input stride +@ * +@ * ;param[in] pred_strd +@ * Prediction stride +@ * +@ * ;param[in] dst_strd +@ * Output Stride +@ * +@ * ;param[in] shift +@ * Output shift +@ * +@ * ;param[in] zero_cols +@ * Zero columns in pi2_src +@ * +@ * ;returns Void +@ * +@ * ;remarks +@ * None +@ * +@ ******************************************************************************* +@ */ + +@void impeg2_itrans_recon_8x8(WORD16 *pi2_src, +@ WORD16 *pi2_tmp, +@ UWORD8 *pu1_pred, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 pred_strd, +@ WORD32 dst_strd, +@ WORD32 zero_cols +@ WORD32 zero_rows ) + +@**************Variables Vs Registers************************* +@ r0 => *pi2_src +@ r1 => *pi2_tmp +@ r2 => *pu1_pred +@ r3 => *pu1_dst +@ src_strd +@ pred_strd +@ dst_strd +@ zero_cols + + + + .global impeg2_idct_recon_a9q +impeg2_idct_recon_a9q: +@//Register Usage Reference - loading and Until IDCT of columns +@// Cosine Constants - D0 +@// Sine Constants - D1 +@// Row 0 First Half - D2 - y0 +@// Row 1 First Half - D6 - y1 +@// Row 2 First Half - D3 - y2 +@// Row 3 First Half - D7 - y3 +@// Row 4 First Half - D10 - y4 +@// Row 5 First Half - D14 - y5 +@// Row 6 First Half - D11 - y6 +@// Row 7 First Half - D15 - y7 + +@// Row 0 Second Half - D4 - y0 +@// Row 1 Second Half - D8 - y1 +@// Row 2 Second Half - D5 - y2 +@// Row 3 Second Half - D9 - y3 +@// Row 4 Second Half - D12 - y4 +@// Row 5 Second Half - D16 - y5 +@// Row 6 Second Half - D13 - y6 +@// Row 7 Second Half - D17 - y7 + + @// Copy the input pointer to another register + @// Step 1 : load all constants + stmfd sp!, {r4-r12, lr} + add sp, sp, #40 + ldr r8, [sp, #4] @ prediction stride + ldr r7, [sp, #8] @ destination stride + ldr r6, [sp] @ src stride + ldr r12, [sp, #12] + ldr r11, [sp, #16] + mov r6, r6, lsl #1 @ x sizeof(word16) + add r9, r0, r6, lsl #1 @ 2 rows + + add r10, r6, r6, lsl #1 @ 3 rows + + sub r10, r10, #8 @ - 4 cols * sizeof(WORD16) + sub r5, r6, #8 @ src_strd - 4 cols * sizeof(WORD16) + + + ldr r14, gai2_impeg2_idct_first_col_q15_addr1 +fcq15_lbl1: + add r14, r14, pc + vld1.16 {d0, d1}, [r14] @//D0,D1 are used for storing the constant data + + @//Step 2 Load all the input data + @//Step 3 Operate first 4 colums at a time + + and r11, r11, #0xff + and r12, r12, #0xff + + cmp r11, #0xf0 + bge skip_last4_rows + + + vld1.16 d2, [r0]! + vld1.16 d3, [r9]! + vld1.16 d4, [r0], r5 + vmull.s16 q10, d2, d0[0] @// y0 * cos4(part of c0 and c1) + vld1.16 d5, [r9], r5 + vmull.s16 q9, d3, d1[2] @// y2 * sin2 (Q3 is freed by this time)(part of d1) + vld1.16 d6, [r0]! + vld1.16 d7, [r9]! + vmull.s16 q12, d6, d0[1] @// y1 * cos1(part of b0) + vld1.16 d8, [r0], r10 + vmull.s16 q13, d6, d0[3] @// y1 * cos3(part of b1) + vld1.16 d9, [r9], r10 + vmull.s16 q14, d6, d1[1] @// y1 * sin3(part of b2) + vld1.16 d10, [r0]! + vmull.s16 q15, d6, d1[3] @// y1 * sin1(part of b3) + vld1.16 d11, [r9]! + vmlal.s16 q12, d7, d0[3] @// y1 * cos1 + y3 * cos3(part of b0) + vld1.16 d12, [r0], r5 + vmlsl.s16 q13, d7, d1[3] @// y1 * cos3 - y3 * sin1(part of b1) + vld1.16 d13, [r9], r5 + vmlsl.s16 q14, d7, d0[1] @// y1 * sin3 - y3 * cos1(part of b2) + vld1.16 d14, [r0]! + vmlsl.s16 q15, d7, d1[1] @// y1 * sin1 - y3 * sin3(part of b3) + vld1.16 d15, [r9]! + vmull.s16 q11, d10, d0[0] @// y4 * cos4(part of c0 and c1) + vld1.16 d16, [r0], r10 + vmull.s16 q3, d3, d0[2] @// y2 * cos2(part of d0) + vld1.16 d17, [r9], r10 + + @/* This following was activated when alignment is not there */ +@// VLD1.16 D2,[r0]! +@// VLD1.16 D3,[r2]! +@// VLD1.16 D4,[r0]! +@// VLD1.16 D5,[r2]! +@// VLD1.16 D6,[r0]! +@// VLD1.16 D7,[r2]! +@// VLD1.16 D8,[r0],r3 +@// VLD1.16 D9,[r2],r3 +@// VLD1.16 D10,[r0]! +@// VLD1.16 D11,[r2]! +@// VLD1.16 D12,[r0]! +@// VLD1.16 D13,[r2]! +@// VLD1.16 D14,[r0]! +@// VLD1.16 D15,[r2]! +@// VLD1.16 D16,[r0],r3 +@// VLD1.16 D17,[r2],r3 + + + + + vmlal.s16 q12, d14, d1[1] @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + vmlsl.s16 q13, d14, d0[1] @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + vmlal.s16 q14, d14, d1[3] @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + vmlal.s16 q15, d14, d0[3] @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + vmlsl.s16 q9, d11, d0[2] @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + vmlal.s16 q3, d11, d1[2] @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + vadd.s32 q5, q10, q11 @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + vsub.s32 q10, q10, q11 @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + vmlal.s16 q12, d15, d1[3] @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of r0,r7) + vmlsl.s16 q13, d15, d1[1] @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of r1,r6) + vmlal.s16 q14, d15, d0[3] @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of r2,r5) + vmlsl.s16 q15, d15, d0[1] @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of r3,r4) + + vadd.s32 q7, q5, q3 @// a0 = c0 + d0(part of r0,r7) + vsub.s32 q5, q5, q3 @// a3 = c0 - d0(part of r3,r4) + vsub.s32 q11, q10, q9 @// a2 = c1 - d1(part of r2,r5) + vadd.s32 q9, q10, q9 @// a1 = c1 + d1(part of r1,r6) + + vadd.s32 q10, q7, q12 @// a0 + b0(part of r0) + vsub.s32 q3, q7, q12 @// a0 - b0(part of r7) + + vadd.s32 q12, q11, q14 @// a2 + b2(part of r2) + vsub.s32 q11, q11, q14 @// a2 - b2(part of r5) + + vadd.s32 q14, q9, q13 @// a1 + b1(part of r1) + vsub.s32 q9, q9, q13 @// a1 - b1(part of r6) + + vadd.s32 q13, q5, q15 @// a3 + b3(part of r3) + vsub.s32 q15, q5, q15 @// a3 - b3(part of r4) + + vqrshrn.s32 d2, q10, #idct_stg1_shift @// r0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d15, q3, #idct_stg1_shift @// r7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d3, q12, #idct_stg1_shift @// r2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d14, q11, #idct_stg1_shift @// r5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d6, q14, #idct_stg1_shift @// r1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d11, q9, #idct_stg1_shift @// r6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d7, q13, #idct_stg1_shift @// r3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d10, q15, #idct_stg1_shift @// r4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT) + + + b last4_cols + + + +skip_last4_rows: + + + ldr r14, gai2_impeg2_idct_first_col_q15_addr2 +fcq15_lbl2: + add r14, r14, pc + vld1.16 {d0, d1}, [r14] @//D0,D1 are used for storing the constant data + + vld1.16 d2, [r0]! + vld1.16 d3, [r9]! + vld1.16 d4, [r0], r5 + vld1.16 d5, [r9], r5 + vld1.16 d6, [r0]! + vld1.16 d7, [r9]! + vld1.16 d8, [r0], r10 + vld1.16 d9, [r9], r10 + + + + vmov.s16 q6, #0 + vmov.s16 q8, #0 + + + + + vmull.s16 q12, d6, d0[1] @// y1 * cos1(part of b0) + vmull.s16 q13, d6, d0[3] @// y1 * cos3(part of b1) + vmull.s16 q14, d6, d1[1] @// y1 * sin3(part of b2) + vmull.s16 q15, d6, d1[3] @// y1 * sin1(part of b3) + + vmlal.s16 q12, d7, d0[3] @// y1 * cos1 + y3 * cos3(part of b0) + vmlsl.s16 q13, d7, d1[3] @// y1 * cos3 - y3 * sin1(part of b1) + vmlsl.s16 q14, d7, d0[1] @// y1 * sin3 - y3 * cos1(part of b2) + vmlsl.s16 q15, d7, d1[1] @// y1 * sin1 - y3 * sin3(part of b3) + + vmull.s16 q9, d3, d1[2] @// y2 * sin2 (Q3 is freed by this time)(part of d1) + vmull.s16 q3, d3, d0[2] @// y2 * cos2(part of d0) + + vmull.s16 q10, d2, d0[0] @// y0 * cos4(part of c0 and c1) + + + vadd.s32 q7, q10, q3 @// a0 = c0 + d0(part of r0,r7) + vsub.s32 q5, q10, q3 @// a3 = c0 - d0(part of r3,r4) + vsub.s32 q11, q10, q9 @// a2 = c1 - d1(part of r2,r5) + vadd.s32 q9, q10, q9 @// a1 = c1 + d1(part of r1,r6) + + vadd.s32 q10, q7, q12 @// a0 + b0(part of r0) + vsub.s32 q3, q7, q12 @// a0 - b0(part of r7) + + vadd.s32 q12, q11, q14 @// a2 + b2(part of r2) + vsub.s32 q11, q11, q14 @// a2 - b2(part of r5) + + vadd.s32 q14, q9, q13 @// a1 + b1(part of r1) + vsub.s32 q9, q9, q13 @// a1 - b1(part of r6) + + vadd.s32 q13, q5, q15 @// a3 + b3(part of r3) + vsub.s32 q15, q5, q15 @// a3 - b3(part of r4) + + vqrshrn.s32 d2, q10, #idct_stg1_shift @// r0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d15, q3, #idct_stg1_shift @// r7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d3, q12, #idct_stg1_shift @// r2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d14, q11, #idct_stg1_shift @// r5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d6, q14, #idct_stg1_shift @// r1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d11, q9, #idct_stg1_shift @// r6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d7, q13, #idct_stg1_shift @// r3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d10, q15, #idct_stg1_shift @// r4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT) + + +last4_cols: + + + cmp r12, #0xf0 + bge skip_last4cols + + ldr r14, gai2_impeg2_idct_first_col_q15_addr3 +fcq15_lbl3: + add r14, r14, pc + vld1.16 {d0, d1}, [r14] @//D0,D1 are used for storing the constant data + + vmull.s16 q12, d8, d0[1] @// y1 * cos1(part of b0) + vmull.s16 q13, d8, d0[3] @// y1 * cos3(part of b1) + vmull.s16 q14, d8, d1[1] @// y1 * sin3(part of b2) + vmull.s16 q15, d8, d1[3] @// y1 * sin1(part of b3) + + vmlal.s16 q12, d9, d0[3] @// y1 * cos1 + y3 * cos3(part of b0) + vmlsl.s16 q13, d9, d1[3] @// y1 * cos3 - y3 * sin1(part of b1) + vmlsl.s16 q14, d9, d0[1] @// y1 * sin3 - y3 * cos1(part of b2) + vmlsl.s16 q15, d9, d1[1] @// y1 * sin1 - y3 * sin3(part of b3) + + vmull.s16 q9, d5, d1[2] @// y2 * sin2 (Q4 is freed by this time)(part of d1) + vmull.s16 q4, d5, d0[2] @// y2 * cos2(part of d0) + + vmull.s16 q10, d4, d0[0] @// y0 * cos4(part of c0 and c1) + vmull.s16 q11, d12, d0[0] @// y4 * cos4(part of c0 and c1) + + vmlal.s16 q12, d16, d1[1] @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + vmlsl.s16 q13, d16, d0[1] @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + vmlal.s16 q14, d16, d1[3] @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + vmlal.s16 q15, d16, d0[3] @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + vmlsl.s16 q9, d13, d0[2] @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + vmlal.s16 q4, d13, d1[2] @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + vadd.s32 q6, q10, q11 @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + vsub.s32 q10, q10, q11 @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + vmlal.s16 q12, d17, d1[3] @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of e0,e7) + vmlsl.s16 q13, d17, d1[1] @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of e1,e6) + vmlal.s16 q14, d17, d0[3] @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of e2,e5) + vmlsl.s16 q15, d17, d0[1] @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of e3,e4) + + vadd.s32 q8, q6, q4 @// a0 = c0 + d0(part of e0,e7) + vsub.s32 q6, q6, q4 @// a3 = c0 - d0(part of e3,e4) + vsub.s32 q11, q10, q9 @// a2 = c1 - d1(part of e2,e5) + vadd.s32 q9, q10, q9 @// a1 = c1 + d1(part of e1,e6) + + vadd.s32 q10, q8, q12 @// a0 + b0(part of e0) + vsub.s32 q4, q8, q12 @// a0 - b0(part of e7) + + vadd.s32 q12, q11, q14 @// a2 + b2(part of e2) + vsub.s32 q11, q11, q14 @// a2 - b2(part of e5) + + vadd.s32 q14, q9, q13 @// a1 + b1(part of e1) + vsub.s32 q9, q9, q13 @// a1 - b1(part of e6) + + vadd.s32 q13, q6, q15 @// a3 + b3(part of e3) + vsub.s32 q15, q6, q15 @// a3 - b3(part of r4) + + vqrshrn.s32 d4, q10, #idct_stg1_shift @// r0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d17, q4, #idct_stg1_shift @// r7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d5, q12, #idct_stg1_shift @// r2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d16, q11, #idct_stg1_shift @// r5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d8, q14, #idct_stg1_shift @// r1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d13, q9, #idct_stg1_shift @// r6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d9, q13, #idct_stg1_shift @// r3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT) + vqrshrn.s32 d12, q15, #idct_stg1_shift @// r4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT) + b end_skip_last4cols + + + +skip_last4cols: + + + + ldr r14, gai2_impeg2_idct_first_col_q11_addr1 +fcq11_lbl1: + add r14, r14, pc + vld1.16 {d0, d1}, [r14] @//D0,D1 are used for storing the constant data + + + + vtrn.16 q1, q3 @//[r3,r1],[r2,r0] first qudrant transposing + + vtrn.16 q5, q7 @//[r7,r5],[r6,r4] third qudrant transposing + + + vtrn.32 d6, d7 @//r0,r1,r2,r3 first qudrant transposing continued..... + vtrn.32 d2, d3 @//r0,r1,r2,r3 first qudrant transposing continued..... + + vtrn.32 d10, d11 @//r4,r5,r6,r7 third qudrant transposing continued..... + vtrn.32 d14, d15 @//r4,r5,r6,r7 third qudrant transposing continued..... + + + vmull.s16 q12, d6, d0[1] @// y1 * cos1(part of b0) + vmull.s16 q13, d6, d0[3] @// y1 * cos3(part of b1) + vmull.s16 q14, d6, d1[1] @// y1 * sin3(part of b2) + vmull.s16 q15, d6, d1[3] @// y1 * sin1(part of b3) + + vmlal.s16 q12, d7, d0[3] @// y1 * cos1 + y3 * cos3(part of b0) + vmlsl.s16 q13, d7, d1[3] @// y1 * cos3 - y3 * sin1(part of b1) + vmlsl.s16 q14, d7, d0[1] @// y1 * sin3 - y3 * cos1(part of b2) + vmlsl.s16 q15, d7, d1[1] @// y1 * sin1 - y3 * sin3(part of b3) + + vmull.s16 q10, d2, d0[0] @// y0 * cos4(part of c0 and c1) +@ VMULL.S16 Q11,D4,D0[0] ;// y4 * cos4(part of c0 and c1) + + vmull.s16 q9, d3, d1[2] @// y2 * sin2 (Q3 is freed by this time)(part of d1) + vmull.s16 q3, d3, d0[2] @// y2 * cos2(part of d0) + + + + + vsub.s32 q11, q10, q3 @// a3 = c0 - d0(part of r3,r4) + vadd.s32 q2, q10, q3 @// a0 = c0 + d0(part of r0,r7) + + + vadd.s32 q1, q2, q12 + + vsub.s32 q3, q2, q12 + + vadd.s32 q4, q11, q15 + + vsub.s32 q12, q11, q15 + + vqrshrn.s32 d5, q4, #idct_stg2_shift + vqrshrn.s32 d2, q1, #idct_stg2_shift + vqrshrn.s32 d9, q3, #idct_stg2_shift + vqrshrn.s32 d6, q12, #idct_stg2_shift + + vsub.s32 q11, q10, q9 @// a2 = c1 - d1(part of r2,r5) + vadd.s32 q9, q10, q9 @// a1 = c1 + d1(part of r1,r6) + + + vadd.s32 q15, q11, q14 + + vsub.s32 q12, q11, q14 + + vadd.s32 q14, q9, q13 + + vsub.s32 q11, q9, q13 + vqrshrn.s32 d4, q15, #idct_stg2_shift + vqrshrn.s32 d7, q12, #idct_stg2_shift + vqrshrn.s32 d3, q14, #idct_stg2_shift + vqrshrn.s32 d8, q11, #idct_stg2_shift + + + + + + + + + + + vmull.s16 q12, d14, d0[1] @// y1 * cos1(part of b0) + + vmull.s16 q13, d14, d0[3] @// y1 * cos3(part of b1) + vmull.s16 q14, d14, d1[1] @// y1 * sin3(part of b2) + vmull.s16 q15, d14, d1[3] @// y1 * sin1(part of b3) + + vmlal.s16 q12, d15, d0[3] @// y1 * cos1 + y3 * cos3(part of b0) + vtrn.16 d2, d3 + vmlsl.s16 q13, d15, d1[3] @// y1 * cos3 - y3 * sin1(part of b1) + vtrn.16 d4, d5 + vmlsl.s16 q14, d15, d0[1] @// y1 * sin3 - y3 * cos1(part of b2) + vtrn.16 d6, d7 + vmlsl.s16 q15, d15, d1[1] @// y1 * sin1 - y3 * sin3(part of b3) + vtrn.16 d8, d9 + vmull.s16 q10, d10, d0[0] @// y0 * cos4(part of c0 and c1) + vtrn.32 d2, d4 + + vtrn.32 d3, d5 + vmull.s16 q9, d11, d1[2] @// y2 * sin2 (Q7 is freed by this time)(part of d1) + vtrn.32 d6, d8 + vmull.s16 q7, d11, d0[2] @// y2 * cos2(part of d0) + vtrn.32 d7, d9 + + + add r4, r2, r8, lsl #1 @ r4 = r2 + pred_strd * 2 => r4 points to 3rd row of pred data + + + add r5, r8, r8, lsl #1 @ + + + add r0, r3, r7, lsl #1 @ r0 points to 3rd row of dest data + + + add r10, r7, r7, lsl #1 @ + + + vswp d3, d6 + + + vswp d5, d8 + + + vsub.s32 q11, q10, q7 @// a3 = c0 - d0(part of r3,r4) + vadd.s32 q6, q10, q7 @// a0 = c0 + d0(part of r0,r7) + + + vadd.s32 q0, q6, q12 + + + vsub.s32 q12, q6, q12 + + + vadd.s32 q6, q11, q15 + + + vsub.s32 q7, q11, q15 + + vqrshrn.s32 d10, q0, #idct_stg2_shift + vqrshrn.s32 d17, q12, #idct_stg2_shift + vqrshrn.s32 d13, q6, #idct_stg2_shift + vqrshrn.s32 d14, q7, #idct_stg2_shift + + vsub.s32 q11, q10, q9 @// a2 = c1 - d1(part of r2,r5) + vadd.s32 q9, q10, q9 @// a1 = c1 + d1(part of r1,r6) + + + vadd.s32 q0, q11, q14 + + + vsub.s32 q12, q11, q14 + + + vadd.s32 q14, q9, q13 + + + vsub.s32 q13, q9, q13 + vld1.8 d18, [r2], r8 + + vqrshrn.s32 d12, q0, #idct_stg2_shift + vld1.8 d20, [r2], r5 + + + vqrshrn.s32 d15, q12, #idct_stg2_shift + vld1.8 d19, [r2], r8 + + + + + vqrshrn.s32 d11, q14, #idct_stg2_shift + vld1.8 d22, [r4], r8 + + + + + vqrshrn.s32 d16, q13, #idct_stg2_shift + vld1.8 d21, [r2], r5 + + + b pred_buff_addition +end_skip_last4cols: + + ldr r14, gai2_impeg2_idct_first_col_q11_addr2 +fcq11_lbl2: + add r14, r14, pc + vld1.16 {d0, d1}, [r14] @//D0,D1 are used for storing the constant data + + +@/* Now the Idct of columns is done, transpose so that row idct done efficiently(step5) */ + vtrn.16 q1, q3 @//[r3,r1],[r2,r0] first qudrant transposing + vtrn.16 q2, q4 @//[r3,r1],[r2,r0] second qudrant transposing + vtrn.16 q5, q7 @//[r7,r5],[r6,r4] third qudrant transposing + vtrn.16 q6, q8 @//[r7,r5],[r6,r4] fourth qudrant transposing + + vtrn.32 d6, d7 @//r0,r1,r2,r3 first qudrant transposing continued..... + vtrn.32 d2, d3 @//r0,r1,r2,r3 first qudrant transposing continued..... + vtrn.32 d4, d5 @//r0,r1,r2,r3 second qudrant transposing continued..... + vtrn.32 d8, d9 @//r0,r1,r2,r3 second qudrant transposing continued..... + vtrn.32 d10, d11 @//r4,r5,r6,r7 third qudrant transposing continued..... + vtrn.32 d14, d15 @//r4,r5,r6,r7 third qudrant transposing continued..... + vtrn.32 d12, d13 @//r4,r5,r6,r7 fourth qudrant transposing continued..... + vtrn.32 d16, d17 @//r4,r5,r6,r7 fourth qudrant transposing continued..... + + @//step6 Operate on first four rows and find their idct + @//Register Usage Reference - storing and IDCT of rows +@// Cosine Constants - D0 +@// Sine Constants - D1 +@// Element 0 First four - D2 - y0 +@// Element 1 First four - D6 - y1 +@// Element 2 First four - D3 - y2 +@// Element 3 First four - D7 - y3 +@// Element 4 First four - D4 - y4 +@// Element 5 First four - D8 - y5 +@// Element 6 First four - D5 - y6 +@// Element 7 First four - D9 - y7 +@// Element 0 Second four - D10 - y0 +@// Element 1 Second four - D14 - y1 +@// Element 2 Second four - D11 - y2 +@// Element 3 Second four - D15 - y3 +@// Element 4 Second four - D12 - y4 +@// Element 5 Second four - D16 - y5 +@// Element 6 Second four - D13 - y6 +@// Element 7 Second four - D17 - y7 + + @// Map between first kernel code seq and current +@// D2 -> D2 +@// D6 -> D6 +@// D3 -> D3 +@// D7 -> D7 +@// D10 -> D4 +@// D14 -> D8 +@// D11 -> D5 +@// D15 -> D9 +@// Q3 -> Q3 +@// Q5 -> Q2 +@// Q7 -> Q4 + + vmull.s16 q12, d6, d0[1] @// y1 * cos1(part of b0) + vmull.s16 q13, d6, d0[3] @// y1 * cos3(part of b1) + vmull.s16 q14, d6, d1[1] @// y1 * sin3(part of b2) + vmull.s16 q15, d6, d1[3] @// y1 * sin1(part of b3) + + vmlal.s16 q12, d7, d0[3] @// y1 * cos1 + y3 * cos3(part of b0) + vmlsl.s16 q13, d7, d1[3] @// y1 * cos3 - y3 * sin1(part of b1) + vmlsl.s16 q14, d7, d0[1] @// y1 * sin3 - y3 * cos1(part of b2) + vmlsl.s16 q15, d7, d1[1] @// y1 * sin1 - y3 * sin3(part of b3) + + vmull.s16 q10, d2, d0[0] @// y0 * cos4(part of c0 and c1) + vmull.s16 q11, d4, d0[0] @// y4 * cos4(part of c0 and c1) + + vmull.s16 q9, d3, d1[2] @// y2 * sin2 (Q3 is freed by this time)(part of d1) + vmull.s16 q3, d3, d0[2] @// y2 * cos2(part of d0) + + + vmlal.s16 q12, d8, d1[1] @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + vmlsl.s16 q13, d8, d0[1] @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + vmlal.s16 q14, d8, d1[3] @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + vmlal.s16 q15, d8, d0[3] @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + vmlsl.s16 q9, d5, d0[2] @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + vmlal.s16 q3, d5, d1[2] @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + vadd.s32 q1, q10, q11 @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + vsub.s32 q10, q10, q11 @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + vmlal.s16 q12, d9, d1[3] @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of r0,r7) + vmlsl.s16 q13, d9, d1[1] @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of r1,r6) + vmlal.s16 q14, d9, d0[3] @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of r2,r5) + vmlsl.s16 q15, d9, d0[1] @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of r3,r4) + + vsub.s32 q11, q1, q3 @// a3 = c0 - d0(part of r3,r4) + vadd.s32 q2, q1, q3 @// a0 = c0 + d0(part of r0,r7) + + + vadd.s32 q1, q2, q12 + + vsub.s32 q3, q2, q12 + + vadd.s32 q4, q11, q15 + + vsub.s32 q12, q11, q15 + + vqrshrn.s32 d5, q4, #idct_stg2_shift + vqrshrn.s32 d2, q1, #idct_stg2_shift + vqrshrn.s32 d9, q3, #idct_stg2_shift + vqrshrn.s32 d6, q12, #idct_stg2_shift + + vsub.s32 q11, q10, q9 @// a2 = c1 - d1(part of r2,r5) + vadd.s32 q9, q10, q9 @// a1 = c1 + d1(part of r1,r6) + + + vadd.s32 q15, q11, q14 + + vsub.s32 q12, q11, q14 + + vadd.s32 q14, q9, q13 + + vsub.s32 q11, q9, q13 + vqrshrn.s32 d4, q15, #idct_stg2_shift + vqrshrn.s32 d7, q12, #idct_stg2_shift + vqrshrn.s32 d3, q14, #idct_stg2_shift + vqrshrn.s32 d8, q11, #idct_stg2_shift + + + + + + + + + + + vmull.s16 q12, d14, d0[1] @// y1 * cos1(part of b0) + + vmull.s16 q13, d14, d0[3] @// y1 * cos3(part of b1) + vmull.s16 q14, d14, d1[1] @// y1 * sin3(part of b2) + vmull.s16 q15, d14, d1[3] @// y1 * sin1(part of b3) + + vmlal.s16 q12, d15, d0[3] @// y1 * cos1 + y3 * cos3(part of b0) + vtrn.16 d2, d3 + vmlsl.s16 q13, d15, d1[3] @// y1 * cos3 - y3 * sin1(part of b1) + vtrn.16 d4, d5 + vmlsl.s16 q14, d15, d0[1] @// y1 * sin3 - y3 * cos1(part of b2) + vtrn.16 d6, d7 + vmlsl.s16 q15, d15, d1[1] @// y1 * sin1 - y3 * sin3(part of b3) + vtrn.16 d8, d9 + vmull.s16 q10, d10, d0[0] @// y0 * cos4(part of c0 and c1) + vtrn.32 d2, d4 + vmull.s16 q11, d12, d0[0] @// y4 * cos4(part of c0 and c1) + vtrn.32 d3, d5 + vmull.s16 q9, d11, d1[2] @// y2 * sin2 (Q7 is freed by this time)(part of d1) + vtrn.32 d6, d8 + vmull.s16 q7, d11, d0[2] @// y2 * cos2(part of d0) + vtrn.32 d7, d9 + vmlal.s16 q12, d16, d1[1] @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + + add r4, r2, r8, lsl #1 @ r4 = r2 + pred_strd * 2 => r4 points to 3rd row of pred data + vmlsl.s16 q13, d16, d0[1] @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + + add r5, r8, r8, lsl #1 @ + vmlal.s16 q14, d16, d1[3] @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + + add r0, r3, r7, lsl #1 @ r0 points to 3rd row of dest data + vmlal.s16 q15, d16, d0[3] @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + add r10, r7, r7, lsl #1 @ + vmlsl.s16 q9, d13, d0[2] @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + + + vmlal.s16 q7, d13, d1[2] @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + vadd.s32 q6, q10, q11 @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + vsub.s32 q10, q10, q11 @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + vmlal.s16 q12, d17, d1[3] @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of r0,r7) + vswp d3, d6 + vmlsl.s16 q13, d17, d1[1] @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of r1,r6) + + vswp d5, d8 + vmlal.s16 q14, d17, d0[3] @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of r2,r5) + vmlsl.s16 q15, d17, d0[1] @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of r3,r4) + + vsub.s32 q11, q6, q7 @// a3 = c0 - d0(part of r3,r4) + vadd.s32 q6, q6, q7 @// a0 = c0 + d0(part of r0,r7) + + + vadd.s32 q0, q6, q12 + + + vsub.s32 q12, q6, q12 + + + vadd.s32 q6, q11, q15 + + + vsub.s32 q7, q11, q15 + + vqrshrn.s32 d10, q0, #idct_stg2_shift + vqrshrn.s32 d17, q12, #idct_stg2_shift + vqrshrn.s32 d13, q6, #idct_stg2_shift + vqrshrn.s32 d14, q7, #idct_stg2_shift + + vsub.s32 q11, q10, q9 @// a2 = c1 - d1(part of r2,r5) + vadd.s32 q9, q10, q9 @// a1 = c1 + d1(part of r1,r6) + + + vadd.s32 q0, q11, q14 + + + vsub.s32 q12, q11, q14 + + + vadd.s32 q14, q9, q13 + + + vsub.s32 q13, q9, q13 + vld1.8 d18, [r2], r8 + + vqrshrn.s32 d12, q0, #idct_stg2_shift + vld1.8 d20, [r2], r5 + + + vqrshrn.s32 d15, q12, #idct_stg2_shift + vld1.8 d19, [r2], r8 + + + + + vqrshrn.s32 d11, q14, #idct_stg2_shift + vld1.8 d22, [r4], r8 + + + + + vqrshrn.s32 d16, q13, #idct_stg2_shift + vld1.8 d21, [r2], r5 + + + + +pred_buff_addition: + + + vtrn.16 d10, d11 + vld1.8 d24, [r4], r5 + + vtrn.16 d12, d13 + vld1.8 d23, [r4], r8 + + vaddw.u8 q1, q1, d18 + vld1.8 d25, [r4], r5 + + vtrn.16 d14, d15 + vaddw.u8 q2, q2, d22 + + vtrn.16 d16, d17 + vaddw.u8 q3, q3, d20 + + vtrn.32 d10, d12 + vaddw.u8 q4, q4, d24 + + vtrn.32 d11, d13 + vtrn.32 d14, d16 + vtrn.32 d15, d17 + + vswp d11, d14 + vswp d13, d16 + +@ Row values stored in the q register. + +@Q1 :r0 +@Q3: r1 +@Q2: r2 +@Q4: r3 +@Q5: r4 +@Q7: r5 +@Q6: r6 +@Q8: r7 + + + +@/// Adding the prediction buffer + + + + + + + + + + @ Load prediction data + + + + + + @Adding recon with prediction + + + + + + vaddw.u8 q5, q5, d19 + vqmovun.s16 d2, q1 + vaddw.u8 q7, q7, d21 + vqmovun.s16 d4, q2 + vaddw.u8 q6, q6, d23 + vqmovun.s16 d6, q3 + vaddw.u8 q8, q8, d25 + vqmovun.s16 d8, q4 + + + + + + + + vst1.8 {d2}, [r3], r7 + vqmovun.s16 d10, q5 + vst1.8 {d6}, [r3], r10 + vqmovun.s16 d14, q7 + vst1.8 {d4}, [r0], r7 + vqmovun.s16 d12, q6 + vst1.8 {d8}, [r0], r10 + vqmovun.s16 d16, q8 + + + + + + + + vst1.8 {d10}, [r3], r7 + vst1.8 {d14}, [r3], r10 + vst1.8 {d12}, [r0], r7 + vst1.8 {d16}, [r0], r10 + + + + + sub sp, sp, #40 + ldmfd sp!, {r4-r12, pc} + + + diff --git a/common/arm/impeg2_inter_pred.s b/common/arm/impeg2_inter_pred.s new file mode 100644 index 0000000..f1b3dde --- /dev/null +++ b/common/arm/impeg2_inter_pred.s @@ -0,0 +1,801 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + +@/* +@//---------------------------------------------------------------------------- +@// File Name : impeg2_inter_pred.s +@// +@// Description : This file has motion compensation related +@// interpolation functions on Neon + CortexA-8 platform +@// +@// Reference Document : +@// +@// Revision History : +@// Date Author Detail Description +@// ------------ ---------------- ---------------------------------- +@// 18 jun 2010 S Hamsalekha Created +@// +@//------------------------------------------------------------------------- +@*/ + +@/* +@// ---------------------------------------------------------------------------- +@// Include Files +@// ---------------------------------------------------------------------------- +@*/ +.text +.p2align 2 + + +@/* +@// ---------------------------------------------------------------------------- +@// Struct/Union Types and Define +@// ---------------------------------------------------------------------------- +@*/ + + +@/* +@// ---------------------------------------------------------------------------- +@// Static Global Data section variables +@// ---------------------------------------------------------------------------- +@*/ +@// -------------------------- NONE -------------------------------------------- + + +@/* +@// ---------------------------------------------------------------------------- +@// Static Prototype Functions +@// ---------------------------------------------------------------------------- +@*/ +@// -------------------------- NONE -------------------------------------------- + +@/* +@// ---------------------------------------------------------------------------- +@// Exported functions +@// ---------------------------------------------------------------------------- +@*/ + +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_copy_mb_a9q() +@// +@// Detail Description : Copies one MB worth of data from src to the dst +@// +@// Inputs : r0 - pointer to src +@// r1 - pointer to dst +@// r2 - source width +@// r3 - destination width +@// Registers Used : r4, r5, d0, d1 +@// +@// Stack Usage : 12 bytes +@// +@// Outputs : +@// +@// Return Data : None +@// +@// Programming Note : <program limitation> +@//----------------------------------------------------------------------------- +@*/ + + + + .global impeg2_copy_mb_a9q + + +impeg2_copy_mb_a9q: + + stmfd r13!, {r4, r5, r14} + + + ldr r4, [r0] @src->y + ldr r5, [r1] @dst->y + @Read one row of data from the src + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + + @//Repeat 15 times for y + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + vld1.8 {d0, d1}, [r4], r2 @Load and increment src + vst1.8 {d0, d1}, [r5], r3 @Store and increment dst + + mov r2, r2, lsr #1 @src_offset /= 2 + mov r3, r3, lsr #1 @dst_offset /= 2 + + ldr r4, [r0, #4] @src->u + ldr r5, [r1, #4] @dst->u + @Read one row of data from the src + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + + @//Repeat 7 times for u + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + + ldr r4, [r0, #8] @src->v + ldr r5, [r1, #8] @dst->v + @Read one row of data from the src + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + + @//Repeat 7 times for v + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + vld1.8 {d0}, [r4], r2 @Load and increment src + vst1.8 {d0}, [r5], r3 @Store and increment dst + + ldmfd r13!, {r4, r5, pc} + + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_mc_fullx_halfy_8x8_a9q() +@// +@// Detail Description : This function pastes the reference block in the +@// current frame buffer.This function is called for +@// blocks that are not coded and have motion vectors +@// with a half pel resolution. +@// +@// Inputs : r0 - out : Current Block Pointer +@// r1 - ref : Refernce Block Pointer +@// r2 - ref_wid : Refernce Block Width +@// r3 - out_wid ; Current Block Width +@// +@// Registers Used : D0-D9 +@// +@// Stack Usage : 4 bytes +@// +@// Outputs : The Motion Compensated Block +@// +@// Return Data : None +@// +@// Programming Note : <program limitation> +@//----------------------------------------------------------------------------- +@*/ + + .global impeg2_mc_fullx_halfy_8x8_a9q + +impeg2_mc_fullx_halfy_8x8_a9q: + + stmfd r13!, {r14} + add r14, r1, r2 + mov r2, r2, lsl #1 + +@/* Load 8 + 1 rows from reference block */ +@/* Do the addition with out rounding off as rounding value is 1 */ + vld1.8 {d0}, [r1], r2 @// first row hence r1 = D0 + vld1.8 {d2}, [r14], r2 @// second row hence r2 = D2 + vld1.8 {d4}, [r1], r2 @// third row hence r3 = D4 + vld1.8 {d6}, [r14], r2 @// fourth row hence r4 = D6 + vld1.8 {d1}, [r1], r2 @// fifth row hence r5 = D1 + vld1.8 {d3}, [r14], r2 @// sixth row hence r6 = D3 + vrhadd.u8 d9, d1, d6 @// estimated row 4 = D9 + vld1.8 {d5}, [r1], r2 @// seventh row hence r7 = D5 + vrhadd.u8 q0, q0, q1 @// estimated row 1 = D0, row 5 = D1 + vld1.8 {d7}, [r14], r2 @// eighth row hence r8 = D7 + vrhadd.u8 q1, q1, q2 @// estimated row 2 = D2, row 6 = D3 + vld1.8 {d8}, [r1], r2 @// ninth row hence r9 = D8 + vrhadd.u8 q2, q2, q3 @// estimated row 3 = D4, row 7 = D5 + + add r14, r0, r3 + mov r3, r3, lsl #1 + +@/* Store the eight rows calculated above */ + vst1.8 {d2}, [r14], r3 @// second row hence D2 + vrhadd.u8 d7, d7, d8 @// estimated row 8 = D7 + vst1.8 {d0}, [r0], r3 @// first row hence D0 + vst1.8 {d9}, [r14], r3 @// fourth row hence D9 + vst1.8 {d4}, [r0], r3 @// third row hence D4 + vst1.8 {d3}, [r14], r3 @// sixth row hence r6 = D3 + vst1.8 {d1}, [r0], r3 @// fifth row hence r5 = D1 + vst1.8 {d7}, [r14], r3 @// eighth row hence r8 = D7 + vst1.8 {d5}, [r0], r3 @// seventh row hence r7 = D5 + + ldmfd sp!, {pc} + + + + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_mc_halfx_fully_8x8_a9q() +@// +@// Detail Description : This function pastes the reference block in the +@// current frame buffer.This function is called for +@// blocks that are not coded and have motion vectors +@// with a half pel resolutionand VopRoundingType is 0 .. +@// +@// Inputs : r0 - out : Current Block Pointer +@// r1 - ref : Refernce Block Pointer +@// r2 - ref_wid : Refernce Block Width +@// r3 - out_wid ; Current Block Width +@// +@// Registers Used : r12, r14, d0-d10, d12-d14, d16-d18, d20-d22 + +@// +@// Stack Usage : 8 bytes +@// +@// Outputs : The Motion Compensated Block +@// +@// Return Data : None +@// +@// Programming Note : <program limitation> +@//----------------------------------------------------------------------------- +@*/ + + + + .global impeg2_mc_halfx_fully_8x8_a9q + + + +impeg2_mc_halfx_fully_8x8_a9q: + + stmfd sp!, {r12, lr} + + add r14, r1, r2, lsl #2 + + add r12, r0, r3, lsl#2 + + vld1.8 {d0, d1}, [r1], r2 @load 16 pixels of row1 + + vld1.8 {d2, d3}, [r14], r2 @ row5 + + + vld1.8 {d4, d5}, [r1], r2 @load 16 pixels row2 + + vld1.8 {d6, d7}, [r14], r2 @row6 + + + vext.8 d8, d0, d1, #1 @Extract pixels (1-8) of row1 + + vext.8 d12, d2, d3, #1 @Extract pixels (1-8) of row5 + + vext.8 d16, d4, d5, #1 @Extract pixels (1-8) of row2 + + vext.8 d20, d6, d7, #1 @Extract pixels (1-8) of row6 + + + vld1.8 {d9, d10}, [r1], r2 @load row3 + + vld1.8 {d13, d14}, [r14], r2 @load row7 + + vld1.8 {d17, d18}, [r1], r2 @load row4 + + vld1.8 {d21, d22}, [r14], r2 @load row8 + + + vext.8 d1, d9, d10, #1 @Extract pixels (1-8) of row3 + + vext.8 d3, d13, d14, #1 @Extract pixels (1-8) of row7 + + + + vext.8 d5, d17, d18, #1 @Extract pixels (1-8) of row4 + + vext.8 d7, d21, d22, #1 @Extract pixels (1-8) of row8 + + + vrhadd.u8 q0, q0, q4 @operate on row1 and row3 + + vrhadd.u8 q1, q1, q6 @operate on row5 and row7 + + + vrhadd.u8 q2, q2, q8 @operate on row2 and row4 + + + + vrhadd.u8 q3, q3, q10 @operate on row6 and row8 + + vst1.8 d0, [r0], r3 @store row1 + + vst1.8 d2, [r12], r3 @store row5 + + vst1.8 d4, [r0], r3 @store row2 + + vst1.8 d6, [r12], r3 @store row6 + + vst1.8 d1, [r0], r3 @store row3 + + vst1.8 d3, [r12], r3 @store row7 + + vst1.8 d5, [r0], r3 @store row4 + + vst1.8 d7, [r12], r3 @store row8 + + + + ldmfd sp!, {r12, pc} + + + + + + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_mc_halfx_halfy_8x8_a9q() +@// +@// Detail Description : This function pastes the reference block in the +@// current frame buffer.This function is called for +@// blocks that are not coded and have motion vectors +@// with a half pel resolutionand VopRoundingType is 0 .. +@// +@// Inputs : r0 - out : Current Block Pointer +@// r1 - ref : Refernce Block Pointer +@// r2 - ref_wid : Refernce Block Width +@// r3 - out_wid ; Current Block Width +@// +@// Registers Used : r14, q0-q15 + +@// +@// Stack Usage : 4 bytes +@// +@// Outputs : The Motion Compensated Block +@// +@// Return Data : None +@// +@// Programming Note : <program limitation> +@//----------------------------------------------------------------------------- +@*/ + + + .global impeg2_mc_halfx_halfy_8x8_a9q + +impeg2_mc_halfx_halfy_8x8_a9q: + + stmfd sp!, {r14} + + add r14, r1, r2, lsl #2 + + vld1.8 {d0, d1}, [r1], r2 @load 16 pixels of row1 + + vld1.8 {d2, d3}, [r14], r2 @ row5 + + vld1.8 {d4, d5}, [r1], r2 @load 16 pixels row2 + + vld1.8 {d6, d7}, [r14], r2 @row6 + + vext.8 d1, d0, d1, #1 @Extract pixels (1-8) of row1 + + + + vext.8 d3, d2, d3, #1 @Extract pixels (1-8) of row5 + + + + vext.8 d5, d4, d5, #1 @Extract pixels (1-8) of row2 + + vext.8 d7, d6, d7, #1 @Extract pixels (1-8) of row6 + + + + + vld1.8 {d8, d9}, [r1], r2 @load row3 + + + + vld1.8 {d10, d11}, [r14], r2 @load row7 + + vld1.8 {d12, d13}, [r1], r2 @load row4 + + vld1.8 {d14, d15}, [r14], r2 @load row8 + + vext.8 d9, d8, d9, #1 @Extract pixels (1-8) of row3 + + vld1.8 {d16, d17}, [r14], r2 @load row9 + + + + + + vext.8 d11, d10, d11, #1 @Extract pixels (1-8) of row7 + + + + vext.8 d13, d12, d13, #1 @Extract pixels (1-8) of row4 + + + + vext.8 d15, d14, d15, #1 @Extract pixels (1-8) of row8 + + vext.8 d17, d16, d17, #1 @Extract pixels (1-8) of row9 + + + @interpolation in x direction + + vaddl.u8 q0, d0, d1 @operate row1 + + vaddl.u8 q1, d2, d3 @operate row5 + + vaddl.u8 q2, d4, d5 @operate row2 + + vaddl.u8 q3, d6, d7 @operate row6 + + vaddl.u8 q4, d8, d9 @operate row3 + + vaddl.u8 q5, d10, d11 @operate row7 + + vaddl.u8 q6, d12, d13 @operate row4 + + vaddl.u8 q7, d14, d15 @operate row8 + + vaddl.u8 q8, d16, d17 @operate row9 + + @interpolation in y direction + + add r14, r0, r3, lsl #2 + + + + vadd.u16 q9, q0, q2 @operate row1 and row2 + + vadd.u16 q13, q1, q3 @operate row5 and row6 + + vadd.u16 q10, q2, q4 @operate row2 and row3 + + vadd.u16 q14, q3, q5 @operate row6 and row7 + + vrshrn.u16 d18, q9, #2 @row1 + + vrshrn.u16 d26, q13, #2 @row5 + + vrshrn.u16 d20, q10, #2 @row2 + + vrshrn.u16 d28, q14, #2 @row6 + + vadd.u16 q11, q4, q6 @operate row3 and row4 + + vst1.8 d18, [r0], r3 @store row1 + + vadd.u16 q15, q5, q7 @operate row7 and row8 + + vst1.8 d26, [r14], r3 @store row5 + + vadd.u16 q12, q6, q1 @operate row4 and row5 + + vst1.8 d20, [r0], r3 @store row2 + + vadd.u16 q7, q7, q8 @operate row8 and row9 + + vst1.8 d28, [r14], r3 @store row6 + + + + vrshrn.u16 d22, q11, #2 @row3 + + vrshrn.u16 d30, q15, #2 @row7 + + vrshrn.u16 d24, q12, #2 @row4 + + vrshrn.u16 d14, q7, #2 @row8 + + + vst1.8 d22, [r0], r3 @store row3 + vst1.8 d30, [r14], r3 @store row7 + vst1.8 d24, [r0], r3 @store row4 + vst1.8 d14, [r14], r3 @store row8 + + + + ldmfd sp!, {pc} + + + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_mc_fullx_fully_8x8_a9q() +@// +@// Detail Description : This function pastes the reference block in the +@// current frame buffer.This function is called for +@// blocks that are not coded and have motion vectors +@// with a half pel resolutionand .. +@// +@// Inputs : r0 - out : Current Block Pointer +@// r1 - ref : Refernce Block Pointer +@// r2 - ref_wid : Refernce Block Width +@// r3 - out_wid ; Current Block Width +@// +@// Registers Used : r12, r14, d0-d3 + +@// +@// Stack Usage : 8 bytes +@// +@// Outputs : The Motion Compensated Block +@// +@// Return Data : None +@// +@// Programming Note : <program limitation> +@//----------------------------------------------------------------------------- +@*/ + + + .global impeg2_mc_fullx_fully_8x8_a9q +impeg2_mc_fullx_fully_8x8_a9q: + + + stmfd sp!, {r12, lr} + + add r14, r1, r2, lsl #2 + + add r12, r0, r3, lsl #2 + + + vld1.8 d0, [r1], r2 @load row1 + + vld1.8 d1, [r14], r2 @load row4 + + vld1.8 d2, [r1], r2 @load row2 + + vld1.8 d3, [r14], r2 @load row5 + + + vst1.8 d0, [r0], r3 @store row1 + + vst1.8 d1, [r12], r3 @store row4 + + vst1.8 d2, [r0], r3 @store row2 + + vst1.8 d3, [r12], r3 @store row5 + + + vld1.8 d0, [r1], r2 @load row3 + + vld1.8 d1, [r14], r2 @load row6 + + vld1.8 d2, [r1], r2 @load row4 + + vld1.8 d3, [r14], r2 @load row8 + + + vst1.8 d0, [r0], r3 @store row3 + + vst1.8 d1, [r12], r3 @store row6 + + vst1.8 d2, [r0], r3 @store row4 + + vst1.8 d3, [r12], r3 @store row8 + + + ldmfd sp!, {r12, pc} + + + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_interpolate_a9q() +@// +@// Detail Description : interpolates two buffers and adds pred +@// +@// Inputs : r0 - pointer to src1 +@// r1 - pointer to src2 +@// r2 - dest buf +@// r3 - dst stride +@// Registers Used : r4, r5, r7, r14, d0-d15 +@// +@// Stack Usage : 20 bytes +@// +@// Outputs : The Motion Compensated Block +@// +@// Return Data : None +@// +@// Programming Note : <program limitation> +@//----------------------------------------------------------------------------- +@*/ + + + .global impeg2_interpolate_a9q + + +impeg2_interpolate_a9q: + + stmfd r13!, {r4, r5, r7, r12, r14} + + ldr r4, [r0, #0] @ptr_y src1 + + ldr r5, [r1, #0] @ptr_y src2 + + ldr r7, [r2, #0] @ptr_y dst buf + + mov r12, #4 @counter for number of blocks + + +interp_lumablocks_stride: + + vld1.8 {d0, d1}, [r4]! @row1 src1 + + vld1.8 {d2, d3}, [r4]! @row2 src1 + + vld1.8 {d4, d5}, [r4]! @row3 src1 + + vld1.8 {d6, d7}, [r4]! @row4 src1 + + + vld1.8 {d8, d9}, [r5]! @row1 src2 + + vld1.8 {d10, d11}, [r5]! @row2 src2 + + vld1.8 {d12, d13}, [r5]! @row3 src2 + + vld1.8 {d14, d15}, [r5]! @row4 src2 + + + + + vrhadd.u8 q0, q0, q4 @operate on row1 + + vrhadd.u8 q1, q1, q5 @operate on row2 + + vrhadd.u8 q2, q2, q6 @operate on row3 + + vrhadd.u8 q3, q3, q7 @operate on row4 + + + + vst1.8 {d0, d1}, [r7], r3 @row1 + + vst1.8 {d2, d3}, [r7], r3 @row2 + + vst1.8 {d4, d5}, [r7], r3 @row3 + + vst1.8 {d6, d7}, [r7], r3 @row4 + + subs r12, r12, #1 + + bne interp_lumablocks_stride + + + mov r3, r3, lsr #1 @stride >> 1 + + ldr r4, [r0, #4] @ptr_u src1 + + ldr r5, [r1, #4] @ptr_u src2 + + ldr r7 , [r2, #4] @ptr_u dst buf + + mov r12, #2 @counter for number of blocks + + + +@chroma blocks + +interp_chromablocks_stride: + + vld1.8 {d0, d1}, [r4]! @row1 & 2 src1 + + vld1.8 {d2, d3}, [r4]! @row3 & 4 src1 + + vld1.8 {d4, d5}, [r4]! @row5 & 6 src1 + + vld1.8 {d6, d7}, [r4]! @row7 & 8 src1 + + + vld1.8 {d8, d9}, [r5]! @row1 & 2 src2 + + vld1.8 {d10, d11}, [r5]! @row3 & 4 src2 + + vld1.8 {d12, d13}, [r5]! @row5 & 6 src2 + + vld1.8 {d14, d15}, [r5]! @row7 & 8 src2 + + + + + vrhadd.u8 q0, q0, q4 @operate on row1 & 2 + + vrhadd.u8 q1, q1, q5 @operate on row3 & 4 + + vrhadd.u8 q2, q2, q6 @operate on row5 & 6 + + vrhadd.u8 q3, q3, q7 @operate on row7 & 8 + + + vst1.8 {d0}, [r7], r3 @row1 + + vst1.8 {d1}, [r7], r3 @row2 + + vst1.8 {d2}, [r7], r3 @row3 + + vst1.8 {d3}, [r7], r3 @row4 + + vst1.8 {d4}, [r7], r3 @row5 + + vst1.8 {d5}, [r7], r3 @row6 + + vst1.8 {d6}, [r7], r3 @row7 + + vst1.8 {d7}, [r7], r3 @row8 + + + + ldr r4, [r0, #8] @ptr_v src1 + + ldr r5, [r1, #8] @ptr_v src2 + + ldr r7, [r2, #8] @ptr_v dst buf + + subs r12, r12, #1 + + bne interp_chromablocks_stride + + + ldmfd r13!, {r4, r5, r7, r12, pc} + + + + + diff --git a/common/arm/impeg2_mem_func.s b/common/arm/impeg2_mem_func.s new file mode 100755 index 0000000..869b7d7 --- /dev/null +++ b/common/arm/impeg2_mem_func.s @@ -0,0 +1,177 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + +@/* +@//---------------------------------------------------------------------------- +@// File Name : impeg2_mem_func.s +@// +@// Description : This file has motion compensation related +@// interpolation functions on Neon + CortexA-8 platform +@// +@// Reference Document : +@// +@// Revision History : +@// Date Author Detail Description +@// ------------ ---------------- ---------------------------------- +@// 18 jun 2010 S Hamsalekha Created +@// +@//------------------------------------------------------------------------- +@*/ + +@/* +@// ---------------------------------------------------------------------------- +@// Include Files +@// ---------------------------------------------------------------------------- +@*/ +.text +.p2align 2 + + +@/* +@// ---------------------------------------------------------------------------- +@// Struct/Union Types and Define +@// ---------------------------------------------------------------------------- +@*/ + + +@/* +@// ---------------------------------------------------------------------------- +@// Static Global Data section variables +@// ---------------------------------------------------------------------------- +@*/ +@// -------------------------- NONE -------------------------------------------- + + +@/* +@// ---------------------------------------------------------------------------- +@// Static Prototype Functions +@// ---------------------------------------------------------------------------- +@*/ +@// -------------------------- NONE -------------------------------------------- + +@/* +@// ---------------------------------------------------------------------------- +@// Exported functions +@// ---------------------------------------------------------------------------- +@*/ + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_memset_8bit_8x8_block_a9q() +@// +@// Detail Description : This routine intialises the Block matrix buffer contents to a +@// particular Value. This function also assumes the buffer size +@// to be set is 64 Bytes fixed. It also assumes that blk matrix +@// used is 64 bit aligned. +@// +@// Inputs : r0: pi2_blk_mat : Block Pointer +@// r1: u2_val : Value with which the block is initialized +@// r2: u4_dst_width: Destination Width +@// +@// Registers Used : q0 +@// +@// Stack Usage : 4 bytes +@// +@// Outputs : Block Matrix Initialized to given value +@// +@// Return Data : None +@// +@// Programming Note : None +@//----------------------------------------------------------------------------- +@*/ + .global impeg2_memset_8bit_8x8_block_a9q +impeg2_memset_8bit_8x8_block_a9q: + str lr, [sp, #-4]! + + vdup.8 d0, r1 @//r1 is the 8-bit value to be set into + + vst1.8 {d0}, [r0], r2 @//Store the row 1 + vst1.8 {d0}, [r0], r2 @//Store the row 2 + vst1.8 {d0}, [r0], r2 @//Store the row 3 + vst1.8 {d0}, [r0], r2 @//Store the row 4 + vst1.8 {d0}, [r0], r2 @//Store the row 5 + vst1.8 {d0}, [r0], r2 @//Store the row 6 + vst1.8 {d0}, [r0], r2 @//Store the row 7 + vst1.8 {d0}, [r0], r2 @//Store the row 8 + + ldr pc, [sp], #4 + + + + + + + +@/* +@//--------------------------------------------------------------------------- +@// Function Name : impeg2_memset0_16bit_8x8_linear_block_a9q() +@// +@// Detail Description : memsets 128 byte long linear buf to 0 +@// +@// Inputs : r0 - Buffer +@// Registers Used : q0 + +@// +@// Stack Usage : 4 bytes +@// +@// Outputs : None +@// +@// Return Data : None +@// +@// Programming Note : <program limitation> +@//----------------------------------------------------------------------------- +@*/ + + + + .global impeg2_memset0_16bit_8x8_linear_block_a9q + + +impeg2_memset0_16bit_8x8_linear_block_a9q: + + stmfd r13!, {r14} + + vmov.i16 q0, #0 + +@Y data + + vst1.16 {d0, d1} , [r0]! @row1 + + vst1.16 {d0, d1} , [r0]! @row2 + + vst1.16 {d0, d1} , [r0]! @row3 + + vst1.16 {d0, d1} , [r0]! @row4 + + vst1.16 {d0, d1} , [r0]! @row5 + + vst1.16 {d0, d1} , [r0]! @row6 + + vst1.16 {d0, d1} , [r0]! @row7 + + vst1.16 {d0, d1} , [r0]! @row8 + + + + ldmfd r13!, {pc} + + + + diff --git a/common/arm/impeg2_platform_macros.h b/common/arm/impeg2_platform_macros.h new file mode 100644 index 0000000..11db302 --- /dev/null +++ b/common/arm/impeg2_platform_macros.h @@ -0,0 +1,75 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_PLATFORM_MACROS_H__ +#define __IMPEG2_PLATFORM_MACROS_H__ + + +#define CONV_LE_TO_BE(u4_temp2,u4_temp1) u4_temp2 = \ + (u4_temp1 << 24) | \ + ((u4_temp1 & 0xff00) << 8) | \ + ((u4_temp1 & 0xff0000) >> 8) | \ + (u4_temp1 >> 24); + +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return (__builtin_clz(u4_word)); + else + return 32; +} +static __inline WORD32 CLIP_U8(WORD32 x) +{ + asm("usat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S8(WORD32 x) +{ + asm("ssat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U12(WORD32 x) +{ + asm("usat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S12(WORD32 x) +{ + asm("ssat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U16(WORD32 x) +{ + asm("usat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} +static __inline WORD32 CLIP_S16(WORD32 x) +{ + asm("ssat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} + +#define INLINE +#define PLD(x) __pld(x) + +#endif /* __IMPEG2_PLATFORM_MACROS_H__ */ diff --git a/common/armv8/impeg2_format_conv.s b/common/armv8/impeg2_format_conv.s new file mode 100644 index 0000000..48baf04 --- /dev/null +++ b/common/armv8/impeg2_format_conv.s @@ -0,0 +1,409 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ + +///* +////---------------------------------------------------------------------------- +//// File Name : impeg2_format_conv.s +//// +//// Description : This file has the Idct Implementations for the +//// MPEG4 SP decoder on neon platform. +//// +//// Reference Document : +//// +//// Revision History : +//// Date Author Detail Description +//// ------------ ---------------- ---------------------------------- +//// Jul 07, 2008 Naveen Kumar T Created +//// +////------------------------------------------------------------------------- +//*/ + +///* +//// ---------------------------------------------------------------------------- +//// Include Files +//// ---------------------------------------------------------------------------- +//*/ +.set log2_16 , 4 +.set log2_2 , 1 + +.text +.include "impeg2_neon_macros.s" +///* +//// ---------------------------------------------------------------------------- +//// Struct/Union Types and Define +//// ---------------------------------------------------------------------------- +//*/ + +///* +//// ---------------------------------------------------------------------------- +//// Static Global Data section variables +//// ---------------------------------------------------------------------------- +//*/ +////--------------------------- NONE -------------------------------------------- + +///* +//// ---------------------------------------------------------------------------- +//// Static Prototype Functions +//// ---------------------------------------------------------------------------- +//*/ +//// -------------------------- NONE -------------------------------------------- + +///* +//// ---------------------------------------------------------------------------- +//// Exported functions +//// ---------------------------------------------------------------------------- +//*/ + + +///***************************************************************************** +//* * +//* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8() * +//* * +//* Description : This function conversts the image from YUV420P color * +//* space to 420SP color space(UV interleaved). * +//* * +//* Arguments : x0 pu1_y * +//* x1 pu1_u * +//* x2 pu1_v * +//* x3 pu1_dest_y * +//* x4 pu1_dest_uv * +//* x5 u2_height * +//* x6 u2_width * +//* x7 u2_stridey * +//* sp, #80 u2_strideu * +//* sp, #88 u2_stridev * +//* sp, #96 u2_dest_stride_y * +//* sp, #104 u2_dest_stride_uv * +//* sp, #112 convert_uv_only * +//* * +//* Values Returned : None * +//* * +//* Register Usage : x8, x10, x16, x20, v0, v1 * +//* * +//* Stack Usage : 80 Bytes * +//* * +//* Interruptibility : Interruptible * +//* * +//* Known Limitations * +//* Assumptions: Image Width: Assumed to be multiple of 16 and * +//* greater than or equal to 16 * +//* Image Height: Assumed to be even. * +//* * +//* Revision History : * +//* DD MM YYYY Author(s) Changes (Describe the changes made) * +//* 07 06 2010 Varshita Draft * +//* 07 06 2010 Naveen Kr T Completed * +//* * +//*****************************************************************************/ +.global impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8 +impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8: + + //// push the registers on the stack + // pu1_y, - x0 + // pu1_u, - x1 + // pu1_v, - x2 + // pu1_dest_y, - x3 + // pu1_dest_uv, - x4 + // u2_height, - x5 + // u2_width, - x6 + // u2_stridey, - x7 + // u2_strideu, - sp, #80 + // u2_stridev, - sp, #88 + // u2_dest_stride_y, - sp, #96 + // u2_dest_stride_uv, - sp, #104 + // convert_uv_only - sp, #112 + // STMFD sp!,{x4-x12,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + + ldr w14, [sp, #112] //// Load convert_uv_only + + cmp w14, #1 + beq yuv420sp_uv_chroma + ///* Do the preprocessing before the main loops start */ + //// Load the parameters from stack + + ldr w8, [sp, #96] //// Load u2_dest_stride_y from stack + uxtw x8, w8 + + sub x7, x7, x6 //// Source increment + + sub x8, x8, x6 //// Destination increment + + +yuv420sp_uv_row_loop_y: + mov x16, x6 + +yuv420sp_uv_col_loop_y: + prfm pldl1keep, [x0, #128] + ld1 {v0.8b, v1.8b}, [x0], #16 + st1 {v0.8b, v1.8b}, [x3], #16 + sub x16, x16, #16 + cmp x16, #15 + bgt yuv420sp_uv_col_loop_y + + cmp x16, #0 + beq yuv420sp_uv_row_loop__y + ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + ////Ex if width is 162, above loop will process 160 pixels. And + ////Both source and destination will point to 146th pixel and then 16 bytes will be read + //// and written using VLD1 and VST1 + sub x20, x16, #16 + neg x16, x20 + sub x0, x0, x16 + sub x3, x3, x16 + + ld1 {v0.8b, v1.8b}, [x0], #16 + st1 {v0.8b, v1.8b}, [x3], #16 + +yuv420sp_uv_row_loop__y: + add x0, x0, x7 + add x3, x3, x8 + subs x5, x5, #1 + bgt yuv420sp_uv_row_loop_y + +yuv420sp_uv_chroma: + ldr w7, [sp, #88] //// Load u2_strideu from stack + sxtw x7, w7 + + ldr w8, [sp, #104] //// Load u2_dest_stride_uv from stack + sxtw x8, w8 + + sub x7, x7, x6, lsr #1 //// Source increment + + sub x8, x8, x6 //// Destination increment + + lsr x6, x6, #1 + lsr x5, x5, #1 +yuv420sp_uv_row_loop_uv: + mov x16, x6 + + +yuv420sp_uv_col_loop_uv: + prfm pldl1keep, [x1, #128] + prfm pldl1keep, [x2, #128] + + ld1 {v0.8b}, [x1], #8 + ld1 {v1.8b}, [x2], #8 + st2 {v0.8b, v1.8b}, [x4], #16 + + sub x16, x16, #8 + cmp x16, #7 + bgt yuv420sp_uv_col_loop_uv + + cmp x16, #0 + beq yuv420sp_uv_row_loop__uv + ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + ////Ex if width is 162, above loop will process 160 pixels. And + ////Both source and destination will point to 146th pixel and then 16 bytes will be read + //// and written using VLD1 and VST1 + sub x20, x16, #8 + neg x16, x20 + sub x1, x1, x16 + sub x2, x2, x16 + sub x4, x4, x16, lsl #1 + + ld1 {v0.8b}, [x1], #8 + ld1 {v1.8b}, [x2], #8 + st2 {v0.8b, v1.8b}, [x4], #16 + +yuv420sp_uv_row_loop__uv: + add x1, x1, x7 + add x2, x2, x7 + add x4, x4, x8 + subs x5, x5, #1 + bgt yuv420sp_uv_row_loop_uv + ////POP THE REGISTERS + // LDMFD sp!,{x4-x12,PC} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///***************************************************************************** +//* * +//* Function Name : impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8() * +//* * +//* Description : This function conversts the image from YUV420P color * +//* space to 420SP color space(VU interleaved). * +//* This function is similar to above function * +//* IMP4D_CXA8_YUV420toYUV420SP_VU with a difference in * +//* VLD1.8 for chroma - order of registers is different * +//* * +//* Arguments : x0 pu1_y * +//* x1 pu1_u * +//* x2 pu1_v * +//* x3 pu1_dest_y * +//* x4 pu1_dest_uv * +//* x5 u2_height * +//* x6 u2_width * +//* x7 u2_stridey * +//* sp, #80 u2_strideu * +//* sp, #88 u2_stridev * +//* sp, #96 u2_dest_stride_y * +//* sp, #104 u2_dest_stride_uv * +//* sp, #112 convert_uv_only * +//* * +//* Values Returned : None * +//* * +//* Register Usage : x8, x14, x16, x20, v0, v1 * +//* * +//* Stack Usage : 80 Bytes * +//* * +//* Interruptibility : Interruptible * +//* * +//* Known Limitations * +//* Assumptions: Image Width: Assumed to be multiple of 16 and * +//* greater than or equal to 16 * +//* Image Height: Assumed to be even. * +//* * +//* Revision History : * +//* DD MM YYYY Author(s) Changes (Describe the changes made) * +//* 07 06 2010 Varshita Draft * +//* 07 06 2010 Naveen Kr T Completed * +//* * +//*****************************************************************************/ + +.global impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8 +impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8: + + //// push the registers on the stack + // pu1_y, - x0 + // pu1_u, - x1 + // pu1_v, - x2 + // pu1_dest_y, - x3 + // pu1_dest_uv, - x4 + // u2_height, - x5 + // u2_width, - x6 + // u2_stridey, - x7 + // u2_strideu, - sp, #80 + // u2_stridev, - sp, #88 + // u2_dest_stride_y, - sp, #96 + // u2_dest_stride_uv, - sp, #104 + // convert_uv_only - sp, #112 + // STMFD sp!,{x4-x12,x14} + push_v_regs + stp x19, x20, [sp, #-16]! + + ldr w14, [sp, #112] //// Load convert_uv_only + + cmp w14, #1 + beq yuv420sp_vu_chroma + + ///* Do the preprocessing before the main loops start */ + //// Load the parameters from stack + + ldr w8, [sp, #96] //// Load u2_dest_stride_y from stack + uxtw x8, w8 + + sub x7, x7, x6 //// Source increment + + sub x8, x8, x6 //// Destination increment + + +yuv420sp_vu_row_loop_y: + mov x16, x6 + +yuv420sp_vu_col_loop_y: + prfm pldl1keep, [x0, #128] + ld1 {v0.8b, v1.8b}, [x0], #16 + st1 {v0.8b, v1.8b}, [x3], #16 + sub x16, x16, #16 + cmp x16, #15 + bgt yuv420sp_vu_col_loop_y + + cmp x16, #0 + beq yuv420sp_vu_row_loop__y + ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + ////Ex if width is 162, above loop will process 160 pixels. And + ////Both source and destination will point to 146th pixel and then 16 bytes will be read + //// and written using VLD1 and VST1 + sub x20, x16, #16 + neg x16, x20 + sub x0, x0, x16 + sub x3, x3, x16 + + ld1 {v0.8b, v1.8b}, [x0], #16 + st1 {v0.8b, v1.8b}, [x3], #16 + +yuv420sp_vu_row_loop__y: + add x0, x0, x7 + add x3, x3, x8 + subs x5, x5, #1 + bgt yuv420sp_vu_row_loop_y + +yuv420sp_vu_chroma: + ldr w7, [sp, #80] //// Load u2_strideu from stack + sxtw x7, w7 + + ldr w8, [sp, #104] //// Load u2_dest_stride_uv from stack + sxtw x8, w8 + + sub x7, x7, x6, lsr #1 //// Source increment + + sub x8, x8, x6 //// Destination increment + + lsr x6, x6, #1 + lsr x5, x5, #1 +yuv420sp_vu_row_loop_uv: + mov x16, x6 + + +yuv420sp_vu_col_loop_uv: + prfm pldl1keep, [x1, #128] + prfm pldl1keep, [x2, #128] + ld1 {v1.8b}, [x1], #8 + ld1 {v0.8b}, [x2], #8 + st2 {v0.8b, v1.8b}, [x4], #16 + sub x16, x16, #8 + cmp x16, #7 + bgt yuv420sp_vu_col_loop_uv + + cmp x16, #0 + beq yuv420sp_vu_row_loop__uv + ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read + ////Ex if width is 162, above loop will process 160 pixels. And + ////Both source and destination will point to 146th pixel and then 16 bytes will be read + //// and written using VLD1 and VST1 + sub x20, x16, #8 + neg x16, x20 + sub x1, x1, x16 + sub x2, x2, x16 + sub x4, x4, x16, lsl #1 + + ld1 {v1.8b}, [x1], #8 + ld1 {v0.8b}, [x2], #8 + st2 {v0.8b, v1.8b}, [x4], #16 + +yuv420sp_vu_row_loop__uv: + add x1, x1, x7 + add x2, x2, x7 + add x4, x4, x8 + subs x5, x5, #1 + bgt yuv420sp_vu_row_loop_uv + ////POP THE REGISTERS + // LDMFD sp!,{x4-x12,PC} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + diff --git a/common/armv8/impeg2_idct.s b/common/armv8/impeg2_idct.s new file mode 100644 index 0000000..4956e54 --- /dev/null +++ b/common/armv8/impeg2_idct.s @@ -0,0 +1,1247 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +// ******************************************************************************* +// * @file +// * impeg2_idct.s +// * +// * @brief +// * contains function definitions for single stage inverse transform +// * +// * @author +// * anand s +// * +// * @par list of functions: +// * - impeg2_idct_recon_dc_av8() +// * +// * @remarks +// * none +// * +// ******************************************************************************* +//*/ + +///** +// ******************************************************************************* +// * +// * @brief +// * this function performs inverse transform and reconstruction for 8x8 +// * input block +// * +// * @par description: +// * performs inverse transform and adds the prediction data and clips output +// * to 8 bit +// * +// * @param[in] pi2_src +// * input 8x8 coefficients +// * +// * @param[in] pi2_tmp +// * temporary 8x8 buffer for storing inverse +// * +// * transform +// * 1st stage output +// * +// * @param[in] pu1_pred +// * prediction 8x8 block +// * +// * @param[out] pu1_dst +// * output 8x8 block +// * +// * @param[in] src_strd +// * input stride +// * +// * @param[in] pred_strd +// * prediction stride +// * +// * @param[in] dst_strd +// * output stride +// * +// * @param[in] shift +// * output shift +// * +// * @param[in] zero_cols +// * zero columns in pi2_src +// * +// * @returns void +// * +// * @remarks +// * none +// * +// ******************************************************************************* +// */ + +//void impeg2_itrans_recon_8x8(word16 *pi2_src, +// word16 *pi2_tmp, +// uword8 *pu1_pred, +// uword8 *pu1_dst, +// word32 src_strd, +// word32 pred_strd, +// word32 dst_strd, +// word32 zero_cols +// word32 zero_rows ) + +//**************variables vs registers************************* +// x0 => *pi2_src +// x1 => *pi2_tmp +// x2 => *pu1_pred +// x3 => *pu1_dst +// src_strd +// pred_strd +// dst_strd +// zero_cols + + + +.text +.align 4 +.include "impeg2_neon_macros.s" + +.set idct_stg1_shift , 12 +.set idct_stg2_shift , 16 +.set idct_stg1_round , (1 << (idct_stg1_shift - 1)) +.set idct_stg2_round , (1 << (idct_stg2_shift - 1)) + +.extern gai2_impeg2_idct_q15 +.extern gai2_impeg2_idct_q11 +.extern gai2_impeg2_idct_first_col_q15 +.extern gai2_impeg2_idct_first_col_q11 +.extern gai2_impeg2_mismatch_stg2_additive + +.global impeg2_idct_recon_dc_av8 +impeg2_idct_recon_dc_av8: + // STMFD sp!,{x4,x6,x12,x14} + push_v_regs + ////x0: pi2_src + ////x1: pi2_tmp - not used, used as pred_strd + ////x2: pu1_pred + ////x3: pu1_dst + ////x4: used as scratch + ////x5: pred_strd + ////x6: dst_strd + + ldrsh x4, [x0] + adrp x14, :got:gai2_impeg2_idct_q15 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_q15] + ldrsh x12, [x14] + + ld1 {v0.8b}, [x2], x5 + mul x4, x4, x12 + + ld1 {v1.8b}, [x2], x5 + add x4, x4, #idct_stg1_round + + ld1 {v2.8b}, [x2], x5 + asr x4, x4, #idct_stg1_shift + + adrp x14, :got:gai2_impeg2_idct_q11 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_q11] + ldrsh x12, [x14] + + ld1 {v3.8b}, [x2], x5 + mul x4, x4, x12 + + ld1 {v4.8b}, [x2], x5 + add x4, x4, #idct_stg2_round + + ld1 {v5.8b}, [x2], x5 + asr x4, x4, #idct_stg2_shift + + ld1 {v6.8b}, [x2], x5 + dup v30.8h, w4 + + + ld1 {v7.8b}, [x2], x5 + + uaddw v8.8h, v30.8h , v0.8b + + uaddw v10.8h, v30.8h , v1.8b + sqxtun v0.8b, v8.8h + + uaddw v12.8h, v30.8h , v2.8b + sqxtun v1.8b, v10.8h + st1 {v0.8b}, [x3], x6 + + uaddw v14.8h, v30.8h , v3.8b + sqxtun v2.8b, v12.8h + st1 {v1.8b}, [x3], x6 + + uaddw v16.8h, v30.8h , v4.8b + sqxtun v3.8b, v14.8h + st1 {v2.8b}, [x3], x6 + + uaddw v18.8h, v30.8h , v5.8b + sqxtun v4.8b, v16.8h + st1 {v3.8b}, [x3], x6 + + uaddw v20.8h, v30.8h , v6.8b + sqxtun v5.8b, v18.8h + st1 {v4.8b}, [x3], x6 + + uaddw v22.8h, v30.8h , v7.8b + sqxtun v6.8b, v20.8h + st1 {v5.8b}, [x3], x6 + + sqxtun v7.8b, v22.8h + st1 {v6.8b}, [x3], x6 + + + st1 {v7.8b}, [x3], x6 + + // LDMFD sp!,{x4,x6,x12,pc} + pop_v_regs + ret + + + +.global impeg2_idct_recon_dc_mismatch_av8 +.extern gai2_impeg2_idct_last_row_q11 +.extern gai2_impeg2_mismatch_stg1_outp +impeg2_idct_recon_dc_mismatch_av8: + // STMFD sp!,{x4-x12,x14} + push_v_regs + + ldrsh x4, [x0] + adrp x14, :got:gai2_impeg2_idct_q15 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_q15] + ldrsh x12, [x14] + + mul x4, x4, x12 + add x4, x4, #idct_stg1_round + asr x4, x4, #idct_stg1_shift + + adrp x14, :got:gai2_impeg2_idct_q11 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_q11] + ldrsh x12, [x14] + mul x4, x4, x12 + dup v0.4s, w4 + + mov x14, #16 ////Increment for table read + adrp x4, :got:gai2_impeg2_mismatch_stg2_additive + ldr x4, [x4, #:got_lo12:gai2_impeg2_mismatch_stg2_additive] + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + ld1 {v2.4h, v3.4h}, [x4], x14 + ld1 {v30.8b}, [x2], x5 + sxtl v8.4s, v2.4h + sxtl v10.4s, v3.4h + raddhn v12.4h, v0.4s, v8.4s + raddhn2 v12.8h, v0.4s, v10.4s + uaddw v14.8h, v12.8h , v30.8b + sqxtun v30.8b, v14.8h + st1 {v30.8b}, [x3], x6 + + + // LDMFD sp!,{x4-x12,pc} + pop_v_regs + ret + +.globl impeg2_idct_recon_av8 + +.type impeg2_idct_recon_av8, %function + +impeg2_idct_recon_av8: +////register usage.extern - loading and until idct of columns +//// cosine constants - d0 +//// sine constants - d1 +//// row 0 first half - d2 - y0 +//// row 1 first half - d6 - y1 +//// row 2 first half - d3 - y2 +//// row 3 first half - d7 - y3 +//// row 4 first half - d10 - y4 +//// row 5 first half - d14 - y5 +//// row 6 first half - d11 - y6 +//// row 7 first half - d15 - y7 + +//// row 0 second half - d4 - y0 +//// row 1 second half - d8 - y1 +//// row 2 second half - d5 - y2 +//// row 3 second half - d9 - y3 +//// row 4 second half - d12 - y4 +//// row 5 second half - d16 - y5 +//// row 6 second half - d13 - y6 +//// row 7 second half - d17 - y7 + + //// copy the input pointer to another register + //// step 1 : load all constants + // stmfd sp!,{x4-x12,x14} + + ldr w11, [sp] // zero rows + + push_v_regs + stp x19, x20, [sp, #-16]! + + mov x12, x7 // zero columns + mov x8, x5 // prediction stride + mov x7, x6 // destination stride + mov x6, x4 // src stride + lsl x6, x6, #1 // x sizeof(word16) + add x9, x0, x6, lsl #1 // 2 rows + + add x10, x6, x6, lsl #1 // 3 rows + + sub x10, x10, #8 // - 4 cols * sizeof(word16) + sub x5, x6, #8 // src_strd - 4 cols * sizeof(word16) + + adrp x14, :got:gai2_impeg2_idct_first_col_q15 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q15] + ld1 {v0.4h, v1.4h}, [x14] ////d0,d1 are used for storing the constant data + + ////step 2 load all the input data + ////step 3 operate first 4 colums at a time + + and x11, x11, #0xff + and x12, x12, #0xff + + cmp x11, #0xf0 + bge skip_last4_rows + + + ld1 {v2.4h}, [x0], #8 + ld1 {v3.4h}, [x9], #8 + ld1 {v4.4h}, [x0], x5 + smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1) + ld1 {v5.4h}, [x9], x5 + smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1) + ld1 {v6.4h}, [x0], #8 + ld1 {v7.4h}, [x9], #8 + smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0) + ld1 {v8.4h}, [x0], x10 + smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1) + ld1 {v9.4h}, [x9], x10 + smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2) + ld1 {v10.4h}, [x0], #8 + smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3) + ld1 {v11.4h}, [x9], #8 + smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) + ld1 {v12.4h}, [x0], x5 + smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1) + ld1 {v13.4h}, [x9], x5 + smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2) + ld1 {v14.4h}, [x0], #8 + smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3) + ld1 {v15.4h}, [x9], #8 + smull v22.4s, v10.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1) + ld1 {v16.4h}, [x0], x10 + smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0) + ld1 {v17.4h}, [x9], x10 + + ///* this following was activated when alignment is not there */ +//// vld1.16 d2,[x0]! +//// vld1.16 d3,[x2]! +//// vld1.16 d4,[x0]! +//// vld1.16 d5,[x2]! +//// vld1.16 d6,[x0]! +//// vld1.16 d7,[x2]! +//// vld1.16 d8,[x0],x3 +//// vld1.16 d9,[x2],x3 +//// vld1.16 d10,[x0]! +//// vld1.16 d11,[x2]! +//// vld1.16 d12,[x0]! +//// vld1.16 d13,[x2]! +//// vld1.16 d14,[x0]! +//// vld1.16 d15,[x2]! +//// vld1.16 d16,[x0],x3 +//// vld1.16 d17,[x2],x3 + + + + + smlal v24.4s, v14.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + smlsl v26.4s, v14.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + smlal v28.4s, v14.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + smlal v30.4s, v14.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + smlsl v18.4s, v11.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + smlal v6.4s, v11.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + add v10.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + smlal v24.4s, v15.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7) + smlsl v26.4s, v15.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6) + smlal v28.4s, v15.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5) + smlsl v30.4s, v15.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4) + + add v14.4s, v10.4s , v6.4s //// a0 = c0 + d0(part of x0,x7) + sub v10.4s, v10.4s , v6.4s //// a3 = c0 - d0(part of x3,x4) + sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5) + add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6) + + add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0) + sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7) + + add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2) + sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5) + + add v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1) + sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6) + + add v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3) + sub v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4) + + sqrshrn v2.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v15.4h, v6.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v3.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v14.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v6.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v11.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v7.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v10.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT) + + + b last4_cols + + + +skip_last4_rows: + adrp x14, :got:gai2_impeg2_idct_first_col_q15 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q15] + ld1 {v0.4h, v1.4h}, [x14] + + ld1 {v2.4h}, [x0], #8 + ld1 {v3.4h}, [x9], #8 + ld1 {v4.4h}, [x0], x5 + ld1 {v5.4h}, [x9], x5 + ld1 {v6.4h}, [x0], #8 + ld1 {v7.4h}, [x9], #8 + ld1 {v8.4h}, [x0], x10 + ld1 {v9.4h}, [x9], x10 + + + + movi v12.4h, #0 + movi v13.4h, #0 + movi v16.4h, #0 + movi v17.4h, #0 + + + + + smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0) + smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1) + smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2) + smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3) + + smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) + smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1) + smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2) + smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3) + + smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1) + smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0) + + smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1) + + + add v14.4s, v20.4s , v6.4s //// a0 = c0 + d0(part of x0,x7) + sub v10.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4) + sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5) + add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6) + + add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0) + sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7) + + add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2) + sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5) + + add v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1) + sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6) + + add v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3) + sub v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4) + + sqrshrn v2.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v15.4h, v6.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v3.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v14.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v6.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v11.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v7.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v10.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT) + + +last4_cols: + adrp x14, :got:gai2_impeg2_idct_first_col_q15 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q15] + ld1 {v0.4h, v1.4h}, [x14] + + + cmp x12, #0xf0 + bge skip_last4cols + + smull v24.4s, v8.4h, v0.4h[1] //// y1 * cos1(part of b0) + smull v26.4s, v8.4h, v0.4h[3] //// y1 * cos3(part of b1) + smull v28.4s, v8.4h, v1.4h[1] //// y1 * sin3(part of b2) + smull v30.4s, v8.4h, v1.4h[3] //// y1 * sin1(part of b3) + + smlal v24.4s, v9.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) + smlsl v26.4s, v9.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1) + smlsl v28.4s, v9.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2) + smlsl v30.4s, v9.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3) + + smull v18.4s, v5.4h, v1.4h[2] //// y2 * sin2 (q4 is freed by this time)(part of d1) + smull v8.4s, v5.4h, v0.4h[2] //// y2 * cos2(part of d0) + + smull v20.4s, v4.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1) + smull v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1) + + smlal v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + smlsl v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + smlal v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + smlal v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + smlsl v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + smlal v8.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + smlal v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of e0,e7) + smlsl v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of e1,e6) + smlal v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of e2,e5) + smlsl v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of e3,e4) + + add v16.4s, v12.4s , v8.4s //// a0 = c0 + d0(part of e0,e7) + sub v12.4s, v12.4s , v8.4s //// a3 = c0 - d0(part of e3,e4) + sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of e2,e5) + add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of e1,e6) + + add v20.4s, v16.4s , v24.4s //// a0 + b0(part of e0) + sub v8.4s, v16.4s , v24.4s //// a0 - b0(part of e7) + + add v24.4s, v22.4s , v28.4s //// a2 + b2(part of e2) + sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of e5) + + add v28.4s, v18.4s , v26.4s //// a1 + b1(part of e1) + sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of e6) + + add v26.4s, v12.4s , v30.4s //// a3 + b3(part of e3) + sub v30.4s, v12.4s , v30.4s //// a3 - b3(part of x4) + + sqrshrn v4.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v17.4h, v8.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v5.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v16.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v8.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v13.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v9.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT) + sqrshrn v12.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT) + b end_skip_last4cols + + + +skip_last4cols: + adrp x14, :got:gai2_impeg2_idct_first_col_q11 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q11] + ld1 {v0.4h, v1.4h}, [x14] + + umov x15, v25.d[0] + + trn1 v25.4h, v2.4h, v6.4h + trn2 v29.4h, v2.4h, v6.4h ////[x3,x1],[x2,x0] first qudrant transposing + + trn1 v27.4h, v3.4h, v7.4h + trn2 v31.4h, v3.4h, v7.4h ////[x3,x1],[x2,x0] first qudrant transposing + + trn1 v6.2s, v29.2s, v31.2s + trn2 v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued..... + trn1 v2.2s, v25.2s, v27.2s + trn2 v3.2s, v25.2s, v27.2s ////x0,x1,x2,x3 first qudrant transposing continued..... + + + trn1 v25.4h, v10.4h, v14.4h + trn2 v29.4h, v10.4h, v14.4h ////[x7,x5],[x6,x4] third qudrant transposing + + trn1 v27.4h, v11.4h, v15.4h + trn2 v31.4h, v11.4h, v15.4h ////[x7,x5],[x6,x4] third qudrant transposing + + trn1 v10.2s, v25.2s, v27.2s + trn2 v11.2s, v25.2s, v27.2s ////x4,x5,x6,x7 third qudrant transposing continued..... + trn1 v14.2s, v29.2s, v31.2s + trn2 v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued..... + + mov v25.d[0], x15 + + smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0) + smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1) + smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2) + smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3) + + smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) + smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1) + smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2) + smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3) + + smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1) +// vmull.s16 q11,d4,d0[0] @// y4 * cos4(part of c0 and c1) + + smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1) + smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0) + + + + + sub v22.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4) + add v4.4s, v20.4s , v6.4s //// a0 = c0 + d0(part of x0,x7) + + + add v2.4s, v4.4s , v24.4s + + sub v6.4s, v4.4s , v24.4s + + add v8.4s, v22.4s , v30.4s + + sub v24.4s, v22.4s , v30.4s + + sqrshrn v5.4h, v8.4s, #idct_stg2_shift + sqrshrn v2.4h, v2.4s, #idct_stg2_shift + sqrshrn v9.4h, v6.4s, #idct_stg2_shift + sqrshrn v6.4h, v24.4s, #idct_stg2_shift + + sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5) + add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6) + + + add v30.4s, v22.4s , v28.4s + + sub v24.4s, v22.4s , v28.4s + + add v28.4s, v18.4s , v26.4s + + sub v22.4s, v18.4s , v26.4s + sqrshrn v4.4h, v30.4s, #idct_stg2_shift + sqrshrn v7.4h, v24.4s, #idct_stg2_shift + sqrshrn v3.4h, v28.4s, #idct_stg2_shift + sqrshrn v8.4h, v22.4s, #idct_stg2_shift + + + + umov x19, v25.d[0] + umov x20, v25.d[1] + + trn1 v27.4h, v2.4h, v3.4h + trn2 v29.4h, v2.4h, v3.4h + trn1 v25.4h, v4.4h, v5.4h + trn2 v31.4h, v4.4h, v5.4h + + trn1 v2.2s, v27.2s, v25.2s + trn2 v4.2s, v27.2s, v25.2s + trn1 v3.2s, v29.2s, v31.2s + trn2 v5.2s, v29.2s, v31.2s + + trn1 v27.4h, v6.4h, v7.4h + trn2 v29.4h, v6.4h, v7.4h + trn1 v25.4h, v8.4h, v9.4h + trn2 v31.4h, v8.4h, v9.4h + + trn1 v6.2s, v27.2s, v25.2s + trn2 v8.2s, v27.2s, v25.2s + trn1 v7.2s, v29.2s, v31.2s + trn2 v9.2s, v29.2s, v31.2s + + mov v25.d[0], x19 + mov v25.d[1], x20 + + smull v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0) + + smull v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1) + smull v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2) + smull v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3) + + smlal v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) + smlsl v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1) + smlsl v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2) + smlsl v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3) + smull v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1) + smull v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1) + smull v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0) + + + add x4, x2, x8, lsl #1 // x4 = x2 + pred_strd * 2 => x4 points to 3rd row of pred data + + + add x5, x8, x8, lsl #1 // + + + add x0, x3, x7, lsl #1 // x0 points to 3rd row of dest data + + + add x10, x7, x7, lsl #1 // + + // swapping v3 and v6 + mov v31.d[0], v3.d[0] + mov v3.d[0], v6.d[0] + mov v6.d[0], v31.d[0] + + // swapping v5 and v8 + mov v31.d[0], v5.d[0] + mov v5.d[0], v8.d[0] + mov v8.d[0], v31.d[0] + + + sub v22.4s, v20.4s , v14.4s //// a3 = c0 - d0(part of x3,x4) + add v12.4s, v20.4s , v14.4s //// a0 = c0 + d0(part of x0,x7) + + + add v0.4s, v12.4s , v24.4s + + + sub v24.4s, v12.4s , v24.4s + + + add v12.4s, v22.4s , v30.4s + + + sub v14.4s, v22.4s , v30.4s + + sqrshrn v10.4h, v0.4s, #idct_stg2_shift + sqrshrn v17.4h, v24.4s, #idct_stg2_shift + sqrshrn v13.4h, v12.4s, #idct_stg2_shift + sqrshrn v14.4h, v14.4s, #idct_stg2_shift + + sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5) + add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6) + + + add v0.4s, v22.4s , v28.4s + + + sub v24.4s, v22.4s , v28.4s + + + add v28.4s, v18.4s , v26.4s + + + sub v26.4s, v18.4s , v26.4s + ld1 {v18.8b}, [x2], x8 + + sqrshrn v12.4h, v0.4s, #idct_stg2_shift + ld1 {v20.8b}, [x2], x5 + + + sqrshrn v15.4h, v24.4s, #idct_stg2_shift + ld1 {v19.8b}, [x2], x8 + + + + + sqrshrn v11.4h, v28.4s, #idct_stg2_shift + ld1 {v22.8b}, [x4], x8 + + + + + sqrshrn v16.4h, v26.4s, #idct_stg2_shift + ld1 {v21.8b}, [x2], x5 + + + b pred_buff_addition +end_skip_last4cols: + adrp x14, :got:gai2_impeg2_idct_first_col_q11 + ldr x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q11] + ld1 {v0.4h, v1.4h}, [x14] + + + umov x19, v25.d[0] + umov x20, v25.d[1] + +///* now the idct of columns is done, transpose so that row idct done efficiently(step5) */ + trn1 v27.4h, v2.4h, v6.4h + trn2 v29.4h, v2.4h, v6.4h ////[x3,x1],[x2,x0] first qudrant transposing + trn1 v25.4h, v3.4h, v7.4h + trn2 v31.4h, v3.4h, v7.4h ////[x3,x1],[x2,x0] first qudrant transposing + + trn1 v2.2s, v27.2s, v25.2s + trn2 v3.2s, v27.2s, v25.2s ////x0,x1,x2,x3 first qudrant transposing continued..... + trn1 v6.2s, v29.2s, v31.2s + trn2 v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued..... + + trn1 v27.4h, v4.4h, v8.4h + trn2 v29.4h, v4.4h, v8.4h ////[x3,x1],[x2,x0] second qudrant transposing + trn1 v25.4h, v5.4h, v9.4h + trn2 v31.4h, v5.4h, v9.4h ////[x3,x1],[x2,x0] second qudrant transposing + + trn1 v4.2s, v27.2s, v25.2s + trn2 v5.2s, v27.2s, v25.2s ////x0,x1,x2,x3 second qudrant transposing continued..... + trn1 v8.2s, v29.2s, v31.2s + trn2 v9.2s, v29.2s, v31.2s ////x0,x1,x2,x3 second qudrant transposing continued..... + + trn1 v27.4h, v10.4h, v14.4h + trn2 v29.4h, v10.4h, v14.4h ////[x7,x5],[x6,x4] third qudrant transposing + trn1 v25.4h, v11.4h, v15.4h + trn2 v31.4h, v11.4h, v15.4h ////[x7,x5],[x6,x4] third qudrant transposing + + trn1 v10.2s, v27.2s, v25.2s + trn2 v11.2s, v27.2s, v25.2s ////x4,x5,x6,x7 third qudrant transposing continued..... + trn1 v14.2s, v29.2s, v31.2s + trn2 v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued..... + + trn1 v27.4h, v12.4h, v16.4h + trn2 v29.4h, v12.4h, v16.4h ////[x7,x5],[x6,x4] fourth qudrant transposing + trn1 v25.4h, v13.4h, v17.4h + trn2 v31.4h, v13.4h, v17.4h ////[x7,x5],[x6,x4] fourth qudrant transposing + + trn1 v12.2s, v27.2s, v25.2s + trn2 v13.2s, v27.2s, v25.2s ////x4,x5,x6,x7 fourth qudrant transposing continued..... + trn1 v16.2s, v29.2s, v31.2s + trn2 v17.2s, v29.2s, v31.2s ////x4,x5,x6,x7 fourth qudrant transposing continued..... + + mov v25.d[0], x19 + mov v25.d[1], x20 + + ////step6 operate on first four rows and find their idct + ////register usage.extern - storing and idct of rows +//// cosine constants - d0 +//// sine constants - d1 +//// element 0 first four - d2 - y0 +//// element 1 first four - d6 - y1 +//// element 2 first four - d3 - y2 +//// element 3 first four - d7 - y3 +//// element 4 first four - d4 - y4 +//// element 5 first four - d8 - y5 +//// element 6 first four - d5 - y6 +//// element 7 first four - d9 - y7 +//// element 0 second four - d10 - y0 +//// element 1 second four - d14 - y1 +//// element 2 second four - d11 - y2 +//// element 3 second four - d15 - y3 +//// element 4 second four - d12 - y4 +//// element 5 second four - d16 - y5 +//// element 6 second four - d13 - y6 +//// element 7 second four - d17 - y7 + + //// map between first kernel code seq and current +//// d2 -> d2 +//// d6 -> d6 +//// d3 -> d3 +//// d7 -> d7 +//// d10 -> d4 +//// d14 -> d8 +//// d11 -> d5 +//// d15 -> d9 +//// q3 -> q3 +//// q5 -> q2 +//// q7 -> q4 + + smull v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0) + smull v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1) + smull v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2) + smull v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3) + + smlal v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) + smlsl v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1) + smlsl v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2) + smlsl v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3) + + smull v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1) + smull v22.4s, v4.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1) + + smull v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1) + smull v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0) + + + smlal v24.4s, v8.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + smlsl v26.4s, v8.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + smlal v28.4s, v8.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + smlal v30.4s, v8.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + smlsl v18.4s, v5.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + smlal v6.4s, v5.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + add v2.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + smlal v24.4s, v9.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7) + smlsl v26.4s, v9.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6) + smlal v28.4s, v9.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5) + smlsl v30.4s, v9.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4) + + sub v22.4s, v2.4s , v6.4s //// a3 = c0 - d0(part of x3,x4) + add v4.4s, v2.4s , v6.4s //// a0 = c0 + d0(part of x0,x7) + + + add v2.4s, v4.4s , v24.4s + + sub v6.4s, v4.4s , v24.4s + + add v8.4s, v22.4s , v30.4s + + sub v24.4s, v22.4s , v30.4s + + sqrshrn v5.4h, v8.4s, #idct_stg2_shift + sqrshrn v2.4h, v2.4s, #idct_stg2_shift + sqrshrn v9.4h, v6.4s, #idct_stg2_shift + sqrshrn v6.4h, v24.4s, #idct_stg2_shift + + sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5) + add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6) + + + add v30.4s, v22.4s , v28.4s + + sub v24.4s, v22.4s , v28.4s + + add v28.4s, v18.4s , v26.4s + + sub v22.4s, v18.4s , v26.4s + sqrshrn v4.4h, v30.4s, #idct_stg2_shift + sqrshrn v7.4h, v24.4s, #idct_stg2_shift + sqrshrn v3.4h, v28.4s, #idct_stg2_shift + sqrshrn v8.4h, v22.4s, #idct_stg2_shift + + + + umov x19, v25.d[0] + umov x20, v25.d[1] + + trn1 v27.4h, v2.4h, v3.4h + trn2 v29.4h, v2.4h, v3.4h + trn1 v25.4h, v4.4h, v5.4h + trn2 v31.4h, v4.4h, v5.4h + + trn1 v2.2s, v27.2s, v25.2s + trn2 v4.2s, v27.2s, v25.2s + trn1 v3.2s, v29.2s, v31.2s + trn2 v5.2s, v29.2s, v31.2s + + trn1 v27.4h, v6.4h, v7.4h + trn2 v29.4h, v6.4h, v7.4h + trn1 v25.4h, v8.4h, v9.4h + trn2 v31.4h, v8.4h, v9.4h + + trn1 v6.2s, v27.2s, v25.2s + trn2 v8.2s, v27.2s, v25.2s + trn1 v7.2s, v29.2s, v31.2s + trn2 v9.2s, v29.2s, v31.2s + + mov v25.d[0], x19 + mov v25.d[1], x20 + + + + smull v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0) + smull v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1) + smull v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2) + smull v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3) + smlal v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0) + smlsl v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1) + smlsl v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2) + smlsl v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3) + smull v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1) + smull v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1) + smull v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1) + smull v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0) + smlal v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0) + + add x4, x2, x8, lsl #1 // x4 = x2 + pred_strd * 2 => x4 points to 3rd row of pred data + smlsl v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1) + + add x5, x8, x8, lsl #1 // + smlal v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2) + + add x0, x3, x7, lsl #1 // x0 points to 3rd row of dest data + smlal v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3) + + add x10, x7, x7, lsl #1 // + smlsl v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1) + + + smlal v14.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1) + + add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1) + sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1) + + smlal v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7) + + // swapping v3 and v6 + mov v31.d[0], v3.d[0] + mov v3.d[0], v6.d[0] + mov v6.d[0], v31.d[0] + + smlsl v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6) + // swapping v5 and v8 + mov v31.d[0], v5.d[0] + mov v5.d[0], v8.d[0] + mov v8.d[0], v31.d[0] + + smlal v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5) + smlsl v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4) + + sub v22.4s, v12.4s , v14.4s //// a3 = c0 - d0(part of x3,x4) + add v12.4s, v12.4s , v14.4s //// a0 = c0 + d0(part of x0,x7) + + + add v0.4s, v12.4s , v24.4s + + + sub v24.4s, v12.4s , v24.4s + + + add v12.4s, v22.4s , v30.4s + + + sub v14.4s, v22.4s , v30.4s + + sqrshrn v10.4h, v0.4s, #idct_stg2_shift + sqrshrn v17.4h, v24.4s, #idct_stg2_shift + sqrshrn v13.4h, v12.4s, #idct_stg2_shift + sqrshrn v14.4h, v14.4s, #idct_stg2_shift + + sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5) + add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6) + + + add v0.4s, v22.4s , v28.4s + + + sub v24.4s, v22.4s , v28.4s + + + add v28.4s, v18.4s , v26.4s + + + sub v26.4s, v18.4s , v26.4s + ld1 {v18.8b}, [x2], x8 + + sqrshrn v12.4h, v0.4s, #idct_stg2_shift + ld1 {v20.8b}, [x2], x5 + + + sqrshrn v15.4h, v24.4s, #idct_stg2_shift + ld1 {v19.8b}, [x2], x8 + + + + + sqrshrn v11.4h, v28.4s, #idct_stg2_shift + ld1 {v22.8b}, [x4], x8 + + + + + sqrshrn v16.4h, v26.4s, #idct_stg2_shift + ld1 {v21.8b}, [x2], x5 + + + + +pred_buff_addition: + + umov x19, v25.d[0] + umov x20, v25.d[1] + + trn1 v27.4h, v10.4h, v11.4h + trn2 v29.4h, v10.4h, v11.4h + trn1 v25.4h, v12.4h, v13.4h + trn2 v31.4h, v12.4h, v13.4h + + trn1 v10.2s, v27.2s, v25.2s + trn2 v12.2s, v27.2s, v25.2s + trn1 v11.2s, v29.2s, v31.2s + trn2 v13.2s, v29.2s, v31.2s + + trn1 v27.4h, v14.4h, v15.4h + trn2 v29.4h, v14.4h, v15.4h + trn1 v25.4h, v16.4h, v17.4h + trn2 v31.4h, v16.4h, v17.4h + + trn1 v14.2s, v27.2s, v25.2s + trn2 v16.2s, v27.2s, v25.2s + trn1 v15.2s, v29.2s, v31.2s + trn2 v17.2s, v29.2s, v31.2s + + + mov v25.d[0], x19 + mov v25.d[1], x20 + + + ld1 {v24.8b}, [x4], x5 + ld1 {v23.8b}, [x4], x8 + ld1 {v25.8b}, [x4], x5 + mov v2.d[1], v3.d[0] + mov v4.d[1], v5.d[0] + mov v6.d[1], v7.d[0] + mov v8.d[1], v9.d[0] + uaddw v2.8h, v2.8h , v18.8b + uaddw v4.8h, v4.8h , v22.8b + uaddw v6.8h, v6.8h , v20.8b + uaddw v8.8h, v8.8h , v24.8b + + // swapping v11 and v14 + mov v31.d[0], v11.d[0] + mov v11.d[0], v14.d[0] + mov v14.d[0], v31.d[0] + + // swapping v13 and v16 + mov v31.d[0], v13.d[0] + mov v13.d[0], v16.d[0] + mov v16.d[0], v31.d[0] +// row values stored in the q register. + +//q1 :x0 +//q3: x1 +//q2: x2 +//q4: x3 +//q5: x4 +//q7: x5 +//q6: x6 +//q8: x7 + + + +///// adding the prediction buffer + + + + + + + + + + // load prediction data + + + + + + //adding recon with prediction + + + + + mov v10.d[1], v11.d[0] + mov v12.d[1], v13.d[0] + mov v14.d[1], v15.d[0] + mov v16.d[1], v17.d[0] + uaddw v10.8h, v10.8h , v19.8b + sqxtun v2.8b, v2.8h + uaddw v14.8h, v14.8h , v21.8b + sqxtun v4.8b, v4.8h + uaddw v12.8h, v12.8h , v23.8b + sqxtun v6.8b, v6.8h + uaddw v16.8h, v16.8h , v25.8b + sqxtun v8.8b, v8.8h + + + + + + + + st1 {v2.8b}, [x3], x7 + sqxtun v10.8b, v10.8h + st1 {v6.8b}, [x3], x10 + sqxtun v14.8b, v14.8h + st1 {v4.8b}, [x0], x7 + sqxtun v12.8b, v12.8h + st1 {v8.8b}, [x0], x10 + sqxtun v16.8b, v16.8h + + + + + + + + st1 {v10.8b}, [x3], x7 + st1 {v14.8b}, [x3], x10 + st1 {v12.8b}, [x0], x7 + st1 {v16.8b}, [x0], x10 + + + + + // ldmfd sp!,{x4-x12,pc} + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + diff --git a/common/armv8/impeg2_inter_pred.s b/common/armv8/impeg2_inter_pred.s new file mode 100644 index 0000000..98ade45 --- /dev/null +++ b/common/armv8/impeg2_inter_pred.s @@ -0,0 +1,814 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ + +///* +////---------------------------------------------------------------------------- +//// File Name : impeg2_inter_pred.s +//// +//// Description : This file has motion compensation related +//// interpolation functions on Neon + CortexA-8 platform +//// +//// Reference Document : +//// +//// Revision History : +//// Date Author Detail Description +//// ------------ ---------------- ---------------------------------- +//// 18 jun 2010 S Hamsalekha Created +//// +////------------------------------------------------------------------------- +//*/ + +///* +//// ---------------------------------------------------------------------------- +//// Include Files +//// ---------------------------------------------------------------------------- +//*/ +// PRESERVE8 +.text +.include "impeg2_neon_macros.s" + +///* +//// ---------------------------------------------------------------------------- +//// Struct/Union Types and Define +//// ---------------------------------------------------------------------------- +//*/ + + +///* +//// ---------------------------------------------------------------------------- +//// Static Global Data section variables +//// ---------------------------------------------------------------------------- +//*/ +//// -------------------------- NONE -------------------------------------------- + + +///* +//// ---------------------------------------------------------------------------- +//// Static Prototype Functions +//// ---------------------------------------------------------------------------- +//*/ +//// -------------------------- NONE -------------------------------------------- + +///* +//// ---------------------------------------------------------------------------- +//// Exported functions +//// ---------------------------------------------------------------------------- +//*/ + + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_copy_mb_av8() +//// +//// Detail Description : Copies one MB worth of data from src to the dst +//// +//// Inputs : x0 - pointer to src +//// x1 - pointer to dst +//// x2 - source width +//// x3 - destination width +//// Registers Used : v0, v1 +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : +//// +//// Return Data : None +//// +//// Programming Note : <program limitation> +////----------------------------------------------------------------------------- +//*/ + + + +.global impeg2_copy_mb_av8 + + +impeg2_copy_mb_av8: + +//STMFD x13!,{x4,x5,x12,x14} + push_v_regs + + + ldr x4, [x0] //src->y + ldr x5, [x1] //dst->y + + //Read one row of data from the src + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + + ////Repeat 15 times for y + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b, v1.8b}, [x4], x2 //Load and increment src + st1 {v0.8b, v1.8b}, [x5], x3 //Store and increment dst + + lsr x2, x2, #1 //src_offset /= 2 + lsr x3, x3, #1 //dst_offset /= 2 + + ldr x4, [x0, #8] //src->u + ldr x5, [x1, #8] //dst->u + + //Read one row of data from the src + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + + ////Repeat 7 times for u + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + + ldr x4, [x0, #16] //src->v + ldr x5, [x1, #16] //dst->v + + //Read one row of data from the src + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + + ////Repeat 7 times for v + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + ld1 {v0.8b}, [x4], x2 //Load and increment src + st1 {v0.8b}, [x5], x3 //Store and increment dst + +//LDMFD x13!,{x4,x5,x12,PC} + pop_v_regs + ret + + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_mc_fullx_halfy_8x8_av8() +//// +//// Detail Description : This function pastes the reference block in the +//// current frame buffer.This function is called for +//// blocks that are not coded and have motion vectors +//// with a half pel resolution. +//// +//// Inputs : x0 - out : Current Block Pointer +//// x1 - ref : Refernce Block Pointer +//// x2 - ref_wid : Refernce Block Width +//// x3 - out_wid @ Current Block Width +//// +//// Registers Used : x14, D0-D9 +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : The Motion Compensated Block +//// +//// Return Data : None +//// +//// Programming Note : <program limitation> +////----------------------------------------------------------------------------- +//*/ + +.global impeg2_mc_fullx_halfy_8x8_av8 + +impeg2_mc_fullx_halfy_8x8_av8: + +//STMFD x13!,{x12,x14} + push_v_regs + add x14, x1, x2 + lsl x2, x2, #1 + +///* Load 8 + 1 rows from reference block */ +///* Do the addition with out rounding off as rounding value is 1 */ + ld1 {v0.8b}, [x1], x2 //// first row hence x1 = D0 + ld1 {v2.8b}, [x14], x2 //// second row hence x2 = D2 + ld1 {v4.8b}, [x1], x2 //// third row hence x3 = D4 + ld1 {v6.8b}, [x14], x2 //// fourth row hence x4 = D6 + ld1 {v1.8b}, [x1], x2 //// fifth row hence x5 = D1 + ld1 {v3.8b}, [x14], x2 //// sixth row hence x6 = D3 + urhadd v9.8b, v1.8b , v6.8b //// estimated row 4 = D9 + ld1 {v5.8b}, [x1], x2 //// seventh row hence x7 = D5 + urhadd v0.16b, v0.16b , v2.16b //// estimated row 1 = D0, row 5 = D1 + urhadd v1.16b, v1.16b , v3.16b //// estimated row 1 = D0, row 5 = D1 + ld1 {v7.8b}, [x14], x2 //// eighth row hence x8 = D7 + urhadd v2.16b, v2.16b , v4.16b //// estimated row 2 = D2, row 6 = D3 + urhadd v3.16b, v3.16b , v5.16b //// estimated row 2 = D2, row 6 = D3 + ld1 {v8.8b}, [x1], x2 //// ninth row hence x9 = D8 + urhadd v4.16b, v4.16b , v6.16b //// estimated row 3 = D4, row 7 = D5 + urhadd v5.16b, v5.16b , v7.16b //// estimated row 3 = D4, row 7 = D5 + + add x14, x0, x3 + lsl x3, x3, #1 + +///* Store the eight rows calculated above */ + st1 {v2.8b}, [x14], x3 //// second row hence D2 + urhadd v7.8b, v7.8b , v8.8b //// estimated row 8 = D7 + st1 {v0.8b}, [x0], x3 //// first row hence D0 + st1 {v9.8b}, [x14], x3 //// fourth row hence D9 + st1 {v4.8b}, [x0], x3 //// third row hence D4 + st1 {v3.8b}, [x14], x3 //// sixth row hence x6 = D3 + st1 {v1.8b}, [x0], x3 //// fifth row hence x5 = D1 + st1 {v7.8b}, [x14], x3 //// eighth row hence x8 = D7 + st1 {v5.8b}, [x0], x3 //// seventh row hence x7 = D5 + +// LDMFD sp!,{x12,pc} + pop_v_regs + ret + + + + + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_mc_halfx_fully_8x8_av8() +//// +//// Detail Description : This function pastes the reference block in the +//// current frame buffer.This function is called for +//// blocks that are not coded and have motion vectors +//// with a half pel resolutionand VopRoundingType is 0 .. +//// +//// Inputs : x0 - out : Current Block Pointer +//// x1 - ref : Refernce Block Pointer +//// x2 - ref_wid : Refernce Block Width +//// x3 - out_wid @ Current Block Width +//// +//// Registers Used : x12, x14, v0-v10, v12-v14, v16-v18, v20-v22 + +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : The Motion Compensated Block +//// +//// Return Data : None +//// +//// Programming Note : <program limitation> +////----------------------------------------------------------------------------- +//*/ + + + +.global impeg2_mc_halfx_fully_8x8_av8 + + + +impeg2_mc_halfx_fully_8x8_av8: + + // STMFD sp!,{x12,x14} + push_v_regs + + add x14, x1, x2, lsl #2 + + add x12, x0, x3, lsl#2 + + ld1 {v0.8b, v1.8b}, [x1], x2 //load 16 pixels of row1 + + ld1 {v2.8b, v3.8b}, [x14], x2 // row5 + + + ld1 {v4.8b, v5.8b}, [x1], x2 //load 16 pixels row2 + + ld1 {v6.8b, v7.8b}, [x14], x2 //row6 + + + ext v8.8b, v0.8b , v1.8b , #1 + + ext v12.8b, v2.8b , v3.8b , #1 + + ext v16.8b, v4.8b , v5.8b , #1 + + ext v20.8b, v6.8b , v7.8b , #1 + + + ld1 {v9.8b, v10.8b}, [x1], x2 //load row3 + + ld1 {v13.8b, v14.8b}, [x14], x2 //load row7 + + ld1 {v17.8b, v18.8b}, [x1], x2 //load row4 + + ld1 {v21.8b, v22.8b}, [x14], x2 //load row8 + + + ext v1.8b, v9.8b , v10.8b , #1 + + ext v3.8b, v13.8b , v14.8b , #1 + + + + ext v5.8b, v17.8b , v18.8b , #1 + + ext v7.8b, v21.8b , v22.8b , #1 + + + urhadd v0.16b, v0.16b , v8.16b //operate on row1 and row3 + urhadd v1.16b, v1.16b , v9.16b //operate on row1 and row3 + + urhadd v2.16b, v2.16b , v12.16b //operate on row5 and row7 + urhadd v3.16b, v3.16b , v13.16b //operate on row5 and row7 + + + urhadd v4.16b, v4.16b , v16.16b //operate on row2 and row4 + urhadd v5.16b, v5.16b , v17.16b //operate on row2 and row4 + + + urhadd v6.16b, v6.16b , v20.16b //operate on row6 and row8 + urhadd v7.16b, v7.16b , v21.16b //operate on row6 and row8 + + st1 {v0.8b}, [x0], x3 //store row1 + + st1 {v2.8b}, [x12], x3 //store row5 + + st1 {v4.8b}, [x0], x3 //store row2 + + st1 {v6.8b}, [x12], x3 //store row6 + + st1 {v1.8b}, [x0], x3 //store row3 + + st1 {v3.8b}, [x12], x3 //store row7 + + st1 {v5.8b}, [x0], x3 //store row4 + + st1 {v7.8b}, [x12], x3 //store row8 + + + + // LDMFD sp!,{x12,pc} + pop_v_regs + ret + + + + + + + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_mc_halfx_halfy_8x8_av8() +//// +//// Detail Description : This function pastes the reference block in the +//// current frame buffer.This function is called for +//// blocks that are not coded and have motion vectors +//// with a half pel resolutionand VopRoundingType is 0 .. +//// +//// Inputs : x0 - out : Current Block Pointer +//// x1 - ref : Refernce Block Pointer +//// x2 - ref_wid : Refernce Block Width +//// x3 - out_wid @ Current Block Width +//// +//// Registers Used : x14, v0-v18, v22, v24, v26, v28, v30 + +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : The Motion Compensated Block +//// +//// Return Data : None +//// +//// Programming Note : <program limitation> +////----------------------------------------------------------------------------- +//*/ + + +.global impeg2_mc_halfx_halfy_8x8_av8 + +impeg2_mc_halfx_halfy_8x8_av8: + + // STMFD sp!,{x12,x14} + push_v_regs + + add x14, x1, x2, lsl #2 + + ld1 {v0.8b, v1.8b}, [x1], x2 //load 16 pixels of row1 + + ld1 {v2.8b, v3.8b}, [x14], x2 // row5 + + ld1 {v4.8b, v5.8b}, [x1], x2 //load 16 pixels row2 + + ld1 {v6.8b, v7.8b}, [x14], x2 //row6 + + ext v1.8b, v0.8b , v1.8b , #1 + + + + ext v3.8b, v2.8b , v3.8b , #1 + + + + ext v5.8b, v4.8b , v5.8b , #1 + + ext v7.8b, v6.8b , v7.8b , #1 + + + + + ld1 {v8.8b, v9.8b}, [x1], x2 //load row3 + + + + ld1 {v10.8b, v11.8b}, [x14], x2 //load row7 + + ld1 {v12.8b, v13.8b}, [x1], x2 //load row4 + + ld1 {v14.8b, v15.8b}, [x14], x2 //load row8 + + ext v9.8b, v8.8b , v9.8b , #1 + + ld1 {v16.8b, v17.8b}, [x14], x2 //load row9 + + + + + + ext v11.8b, v10.8b , v11.8b , #1 + + + + ext v13.8b, v12.8b , v13.8b , #1 + + + + ext v15.8b, v14.8b , v15.8b , #1 + + ext v17.8b, v16.8b , v17.8b , #1 + + + //interpolation in x direction + + uaddl v0.8h, v0.8b, v1.8b //operate row1 + + uaddl v2.8h, v2.8b, v3.8b //operate row5 + + uaddl v4.8h, v4.8b, v5.8b //operate row2 + + uaddl v6.8h, v6.8b, v7.8b //operate row6 + + uaddl v8.8h, v8.8b, v9.8b //operate row3 + + uaddl v10.8h, v10.8b, v11.8b //operate row7 + + uaddl v12.8h, v12.8b, v13.8b //operate row4 + + uaddl v14.8h, v14.8b, v15.8b //operate row8 + + uaddl v16.8h, v16.8b, v17.8b //operate row9 + + //interpolation in y direction + + add x14, x0, x3, lsl #2 + + + + add v18.8h, v0.8h , v4.8h //operate row1 and row2 + + add v26.8h, v2.8h , v6.8h //operate row5 and row6 + + add v20.8h, v4.8h , v8.8h //operate row2 and row3 + + add v28.8h, v6.8h , v10.8h //operate row6 and row7 + + rshrn v18.8b, v18.8h, #2 //row1 + + rshrn v26.8b, v26.8h, #2 //row5 + + rshrn v20.8b, v20.8h, #2 //row2 + + rshrn v28.8b, v28.8h, #2 //row6 + + add v22.8h, v8.8h , v12.8h //operate row3 and row4 + + st1 {v18.8b}, [x0], x3 //store row1 + + add v30.8h, v10.8h , v14.8h //operate row7 and row8 + + st1 {v26.8b}, [x14], x3 //store row5 + + add v24.8h, v12.8h , v2.8h //operate row4 and row5 + + st1 {v20.8b}, [x0], x3 //store row2 + + add v14.8h, v14.8h , v16.8h //operate row8 and row9 + + st1 {v28.8b}, [x14], x3 //store row6 + + + + rshrn v22.8b, v22.8h, #2 //row3 + + rshrn v30.8b, v30.8h, #2 //row7 + + rshrn v24.8b, v24.8h, #2 //row4 + + rshrn v14.8b, v14.8h, #2 //row8 + + + st1 {v22.8b}, [x0], x3 //store row3 + st1 {v30.8b}, [x14], x3 //store row7 + st1 {v24.8b}, [x0], x3 //store row4 + st1 {v14.8b}, [x14], x3 //store row8 + + + + // LDMFD sp!,{x12,pc} + pop_v_regs + ret + + + + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_mc_fullx_fully_8x8_av8() +//// +//// Detail Description : This function pastes the reference block in the +//// current frame buffer.This function is called for +//// blocks that are not coded and have motion vectors +//// with a half pel resolutionand .. +//// +//// Inputs : x0 - out : Current Block Pointer +//// x1 - ref : Refernce Block Pointer +//// x2 - ref_wid : Refernce Block Width +//// x3 - out_wid @ Current Block Width +//// +//// Registers Used : x12, x14, v0-v3 + +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : The Motion Compensated Block +//// +//// Return Data : None +//// +//// Programming Note : <program limitation> +////----------------------------------------------------------------------------- +//*/ + + +.global impeg2_mc_fullx_fully_8x8_av8 +impeg2_mc_fullx_fully_8x8_av8: + + + // STMFD sp!,{x12,x14} + push_v_regs + + add x14, x1, x2, lsl #2 + + add x12, x0, x3, lsl #2 + + + ld1 {v0.8b}, [x1], x2 //load row1 + + ld1 {v1.8b}, [x14], x2 //load row4 + + ld1 {v2.8b}, [x1], x2 //load row2 + + ld1 {v3.8b}, [x14], x2 //load row5 + + + st1 {v0.8b}, [x0], x3 //store row1 + + st1 {v1.8b}, [x12], x3 //store row4 + + st1 {v2.8b}, [x0], x3 //store row2 + + st1 {v3.8b}, [x12], x3 //store row5 + + + ld1 {v0.8b}, [x1], x2 //load row3 + + ld1 {v1.8b}, [x14], x2 //load row6 + + ld1 {v2.8b}, [x1], x2 //load row4 + + ld1 {v3.8b}, [x14], x2 //load row8 + + + st1 {v0.8b}, [x0], x3 //store row3 + + st1 {v1.8b}, [x12], x3 //store row6 + + st1 {v2.8b}, [x0], x3 //store row4 + + st1 {v3.8b}, [x12], x3 //store row8 + + + // LDMFD sp!,{x12,pc} + pop_v_regs + ret + + + + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_interpolate_av8() +//// +//// Detail Description : interpolates two buffers and adds pred +//// +//// Inputs : x0 - pointer to src1 +//// x1 - pointer to src2 +//// x2 - dest buf +//// x3 - dst stride +//// Registers Used : x12, v0-v15 +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : The Motion Compensated Block +//// +//// Return Data : None +//// +//// Programming Note : <program limitation> +////----------------------------------------------------------------------------- +//*/ + + +.global impeg2_interpolate_av8 + + +impeg2_interpolate_av8: + +//STMFD x13!,{x4-x7,x12,x14} + push_v_regs + + ldr x4, [x0, #0] //ptr_y src1 + + ldr x5, [x1, #0] //ptr_y src2 + + ldr x7, [x2, #0] //ptr_y dst buf + + mov x12, #4 //counter for number of blocks + + +interp_lumablocks_stride: + ld1 {v0.16b}, [x4], #16 //row1 src1 + + ld1 {v2.16b}, [x4], #16 //row2 src1 + + ld1 {v4.16b}, [x4], #16 //row3 src1 + + ld1 {v6.16b}, [x4], #16 //row4 src1 + + + ld1 {v8.16b}, [x5], #16 //row1 src2 + + ld1 {v10.16b}, [x5], #16 //row2 src2 + + ld1 {v12.16b}, [x5], #16 //row3 src2 + + ld1 {v14.16b}, [x5], #16 //row4 src2 + + urhadd v0.16b, v0.16b , v8.16b //operate on row1 + + urhadd v2.16b, v2.16b , v10.16b //operate on row2 + + urhadd v4.16b, v4.16b , v12.16b //operate on row3 + + urhadd v6.16b, v6.16b , v14.16b //operate on row4 + st1 {v0.16b}, [x7], x3 //row1 + + st1 {v2.16b}, [x7], x3 //row2 + + st1 {v4.16b}, [x7], x3 //row3 + + st1 {v6.16b}, [x7], x3 //row4 + + subs x12, x12, #1 + + bne interp_lumablocks_stride + + + lsr x3, x3, #1 //stride >> 1 + + ldr x4, [x0, #8] //ptr_u src1 + + ldr x5, [x1, #8] //ptr_u src2 + + ldr x7 , [x2, #8] //ptr_u dst buf + + mov x12, #2 //counter for number of blocks + + + +//chroma blocks + +interp_chromablocks_stride: + ld1 {v0.8b, v1.8b}, [x4], #16 //row1 & 2 src1 + + ld1 {v2.8b, v3.8b}, [x4], #16 //row3 & 4 src1 + + ld1 {v4.8b, v5.8b}, [x4], #16 //row5 & 6 src1 + + ld1 {v6.8b, v7.8b}, [x4], #16 //row7 & 8 src1 + + + ld1 {v8.8b, v9.8b}, [x5], #16 //row1 & 2 src2 + + ld1 {v10.8b, v11.8b}, [x5], #16 //row3 & 4 src2 + + ld1 {v12.8b, v13.8b}, [x5], #16 //row5 & 6 src2 + + ld1 {v14.8b, v15.8b}, [x5], #16 //row7 & 8 src2 + + urhadd v0.16b, v0.16b , v8.16b //operate on row1 & 2 + urhadd v1.16b, v1.16b , v9.16b //operate on row1 & 2 + + urhadd v2.16b, v2.16b , v10.16b //operate on row3 & 4 + urhadd v3.16b, v3.16b , v11.16b //operate on row3 & 4 + + urhadd v4.16b, v4.16b , v12.16b //operate on row5 & 6 + urhadd v5.16b, v5.16b , v13.16b //operate on row5 & 6 + + urhadd v6.16b, v6.16b , v14.16b //operate on row7 & 8 + urhadd v7.16b, v7.16b , v15.16b //operate on row7 & 8 + + st1 {v0.8b}, [x7], x3 //row1 + + st1 {v1.8b}, [x7], x3 //row2 + + st1 {v2.8b}, [x7], x3 //row3 + + st1 {v3.8b}, [x7], x3 //row4 + + st1 {v4.8b}, [x7], x3 //row5 + + st1 {v5.8b}, [x7], x3 //row6 + + st1 {v6.8b}, [x7], x3 //row7 + + st1 {v7.8b}, [x7], x3 //row8 + + + ldr x4, [x0, #16] //ptr_v src1 + + ldr x5, [x1, #16] //ptr_v src2 + + ldr x7, [x2, #16] //ptr_v dst buf + + subs x12, x12, #1 + + bne interp_chromablocks_stride + + + //LDMFD x13!,{x4-x7,x12,PC} + pop_v_regs + ret + + + + diff --git a/common/armv8/impeg2_mem_func.s b/common/armv8/impeg2_mem_func.s new file mode 100644 index 0000000..f0bb590 --- /dev/null +++ b/common/armv8/impeg2_mem_func.s @@ -0,0 +1,181 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ + +///* +////---------------------------------------------------------------------------- +//// File Name : mot_comp_neon.s +//// +//// Description : This file has motion compensation related +//// interpolation functions on Neon + CortexA-8 platform +//// +//// Reference Document : +//// +//// Revision History : +//// Date Author Detail Description +//// ------------ ---------------- ---------------------------------- +//// 18 jun 2010 S Hamsalekha Created +//// +////------------------------------------------------------------------------- +//*/ + +///* +//// ---------------------------------------------------------------------------- +//// Include Files +//// ---------------------------------------------------------------------------- +//*/ +// PRESERVE8 +.text +.include "impeg2_neon_macros.s" +///* +//// ---------------------------------------------------------------------------- +//// Struct/Union Types and Define +//// ---------------------------------------------------------------------------- +//*/ + + +///* +//// ---------------------------------------------------------------------------- +//// Static Global Data section variables +//// ---------------------------------------------------------------------------- +//*/ +//// -------------------------- NONE -------------------------------------------- + + +///* +//// ---------------------------------------------------------------------------- +//// Static Prototype Functions +//// ---------------------------------------------------------------------------- +//*/ +//// -------------------------- NONE -------------------------------------------- + +///* +//// ---------------------------------------------------------------------------- +//// Exported functions +//// ---------------------------------------------------------------------------- +//*/ + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_memset_8bit_8x8_block_av8() +//// +//// Detail Description : This routine intialises the Block matrix buffer contents to a +//// particular Value. This function also assumes the buffer size +//// to be set is 64 Bytes fixed. It also assumes that blk matrix +//// used is 64 bit aligned. +//// +//// Inputs : pi2_blk_mat : Block Pointer +//// u2_val : Value with which the block is initialized +//// +//// Registers Used : v0 +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : Block Matrix Iniliazed to given value +//// +//// Return Data : None +//// +//// Programming Note : This implementation assumes that blk matrix buffer +//// is 128 bit aligned +////----------------------------------------------------------------------------- +//*/ +.global impeg2_memset_8bit_8x8_block_av8 +impeg2_memset_8bit_8x8_block_av8: + push_v_regs + +// ADD x3,x0,#WIDTH_X_SIZE @//x3 is another copy address offsetted + + dup v0.8b, w1 ////x1 is the 8-bit value to be set into + + st1 {v0.8b}, [x0], x2 ////Store the row 1 + st1 {v0.8b}, [x0], x2 ////Store the row 2 + st1 {v0.8b}, [x0], x2 ////Store the row 3 + st1 {v0.8b}, [x0], x2 ////Store the row 4 + st1 {v0.8b}, [x0], x2 ////Store the row 5 + st1 {v0.8b}, [x0], x2 ////Store the row 6 + st1 {v0.8b}, [x0], x2 ////Store the row 7 + st1 {v0.8b}, [x0], x2 ////Store the row 8 + + pop_v_regs + ret + + + + + + +///* +////--------------------------------------------------------------------------- +//// Function Name : impeg2_memset0_16bit_8x8_linear_block_av8() +//// +//// Detail Description : memsets resudual buf to 0 +//// +//// Inputs : x0 - pointer to y +//// x1 - pointer to u +//// x2 - pointer to v +//// Registers Used : v0 + +//// +//// Stack Usage : 64 bytes +//// +//// Outputs : The Motion Compensated Block +//// +//// Return Data : None +//// +//// Programming Note : <program limitation> +////----------------------------------------------------------------------------- +//*/ + + + +.global impeg2_memset0_16bit_8x8_linear_block_av8 + + +impeg2_memset0_16bit_8x8_linear_block_av8: + + push_v_regs + + movi v0.8h, #0 + + //Y data + + st1 {v0.8h} , [x0], #16 //row1 + + st1 {v0.8h} , [x0], #16 //row2 + + st1 {v0.8h} , [x0], #16 //row3 + + st1 {v0.8h} , [x0], #16 //row4 + + st1 {v0.8h} , [x0], #16 //row5 + + st1 {v0.8h} , [x0], #16 //row6 + + st1 {v0.8h} , [x0], #16 //row7 + + st1 {v0.8h} , [x0], #16 //row8 + + + + pop_v_regs + ret + + + + diff --git a/common/armv8/impeg2_neon_macros.s b/common/armv8/impeg2_neon_macros.s new file mode 100644 index 0000000..452ba45 --- /dev/null +++ b/common/armv8/impeg2_neon_macros.s @@ -0,0 +1,58 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//******************************************************************************* +//* @file +//* impeg2_neon_macros.s +//* +//* @brief +//* Contains assembly macros +//* +//* @author +//* Naveen SR +//* +//* @par List of Functions: +//* +//* +//* @remarks +//* None +//* +//******************************************************************************* + + +.macro push_v_regs + stp d8, d9, [sp, #-16]! + stp d10, d11, [sp, #-16]! + stp d12, d13, [sp, #-16]! + stp d14, d15, [sp, #-16]! +.endm +.macro pop_v_regs + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 +.endm + +.macro swp reg1, reg2 + eor \reg1, \reg1, \reg2 + eor \reg2, \reg1, \reg2 + eor \reg1, \reg1, \reg2 +.endm + diff --git a/common/armv8/impeg2_platform_macros.h b/common/armv8/impeg2_platform_macros.h new file mode 100644 index 0000000..ff31034 --- /dev/null +++ b/common/armv8/impeg2_platform_macros.h @@ -0,0 +1,49 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_PLATFORM_MACROS_H__ +#define __IMPEG2_PLATFORM_MACROS_H__ + +#define CONV_LE_TO_BE(u4_temp2,u4_temp1) u4_temp2 = \ + (u4_temp1 << 24) | \ + ((u4_temp1 & 0xff00) << 8) | \ + ((u4_temp1 & 0xff0000) >> 8) | \ + (u4_temp1 >> 24); + +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return (__builtin_clz(u4_word)); + else + return 32; +} + +#define CLIP_U8(x) ((x) > 255) ? (255) : (((x) < 0) ? (0) : (x)) +#define CLIP_S8(x) ((x) > 127) ? (127) : (((x) < -128) ? (-128) : (x)) + +#define CLIP_U12(x) ((x) > 4095) ? (4095) : (((x) < 0) ? (0) : (x)) +#define CLIP_S12(x) ((x) > 2047) ? (2047) : (((x) < -2048) ? (-2048) : (x)) + +#define CLIP_U16(x) ((x) > 65535) ? (65535) : (((x) < 0) ? (0) : (x)) +#define CLIP_S16(x) ((x) > 65535) ? (65535) : (((x) < -65536) ? (-65536) : (x)) + +#define INLINE +#define PLD(x) __pld(x) + +#endif /* __IMPEG2_PLATFORM_MACROS_H__ */ diff --git a/common/impeg2_buf_mgr.c b/common/impeg2_buf_mgr.c new file mode 100644 index 0000000..c4aca4a --- /dev/null +++ b/common/impeg2_buf_mgr.c @@ -0,0 +1,411 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* impeg2_buf_mgr.c +* +* @brief +* Contains function definitions for buffer management +* +* @author +* Srinivas T +* +* @par List of Functions: +* - impeg2_buf_mgr_init() +* - impeg2_buf_mgr_add() +* - impeg2_buf_mgr_get_next_free() +* - impeg2_buf_mgr_check_free() +* - impeg2_buf_mgr_release() +* - impeg2_buf_mgr_set_status() +* - impeg2_buf_mgr_get_status() +* - impeg2_buf_mgr_get_buf() +* - impeg2_buf_mgr_get_num_active_buf() +* +* @remarks +* None +* +******************************************************************************* +*/ +#include <stdio.h> +#include <stdlib.h> +#include "iv_datatypedef.h" +#include "impeg2_defs.h" +#include "impeg2_buf_mgr.h" + + + +/** +******************************************************************************* +* +* @brief +* Buffer manager initialization function. +* +* @par Description: +* Initializes the buffer manager structure +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @returns +* +* @remarks +* None +* +******************************************************************************* +*/ + +void impeg2_buf_mgr_init( + buf_mgr_t *ps_buf_mgr) +{ + WORD32 id; + + ps_buf_mgr->u4_max_buf_cnt = BUF_MGR_MAX_CNT; + ps_buf_mgr->u4_active_buf_cnt = 0; + + for(id = 0; id < BUF_MGR_MAX_CNT; id++) + { + ps_buf_mgr->au4_status[id] = 0; + ps_buf_mgr->apv_ptr[id] = NULL; + } +} + + +/** +******************************************************************************* +* +* @brief +* Adds and increments the buffer and buffer count. +* +* @par Description: +* Adds a buffer to the buffer manager if it is not already present and +* increments the active buffer count +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] pv_ptr +* Pointer to the buffer to be added +* +* @returns Returns 0 on success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 impeg2_buf_mgr_add( + buf_mgr_t *ps_buf_mgr, + void *pv_ptr, + WORD32 i4_buf_id) +{ + + /* Check if buffer ID is within allowed range */ + if(i4_buf_id >= (WORD32)ps_buf_mgr->u4_max_buf_cnt) + { + return (-1); + } + + /* Check if the current ID is being used to hold some other buffer */ + if((ps_buf_mgr->apv_ptr[i4_buf_id] != NULL) && + (ps_buf_mgr->apv_ptr[i4_buf_id] != pv_ptr)) + { + return (-1); + } + ps_buf_mgr->apv_ptr[i4_buf_id] = pv_ptr; + + return 0; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the next free buffer. +* +* @par Description: +* Returns the next free buffer available and sets the corresponding status +* to DEC +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] pi4_buf_id +* Pointer to the id of the free buffer +* +* @returns Pointer to the free buffer +* +* @remarks +* None +* +******************************************************************************* +*/ +void* impeg2_buf_mgr_get_next_free( + buf_mgr_t *ps_buf_mgr, + WORD32 *pi4_buf_id) +{ + WORD32 id; + void *pv_ret_ptr; + + pv_ret_ptr = NULL; + for(id = 0; id < (WORD32)ps_buf_mgr->u4_max_buf_cnt; id++) + { + /* Check if the buffer is non-null and status is zero */ + if((ps_buf_mgr->au4_status[id] == 0) && (ps_buf_mgr->apv_ptr[id])) + { + *pi4_buf_id = id; + /* DEC is set to 1 */ + ps_buf_mgr->au4_status[id] = 1; + pv_ret_ptr = ps_buf_mgr->apv_ptr[id]; + break; + } + } + + return pv_ret_ptr; +} + + +/** +******************************************************************************* +* +* @brief +* Checks the buffer manager for free buffers available. +* +* @par Description: +* Checks if there are any free buffers available +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @returns Returns 0 if available, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 impeg2_buf_mgr_check_free( + buf_mgr_t *ps_buf_mgr) +{ + UWORD32 id; + + for(id = 0; id < ps_buf_mgr->u4_max_buf_cnt; id++) + { + if((ps_buf_mgr->au4_status[id] == 0) && + (ps_buf_mgr->apv_ptr[id])) + { + return 1; + } + } + + return 0; + +} + + +/** +******************************************************************************* +* +* @brief +* Resets the status bits. +* +* @par Description: +* resets the status bits that the mask contains (status corresponding to +* the id) +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer status to be released +* +* @param[in] mask +* Contains the bits that are to be reset +* +* @returns 0 if success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 impeg2_buf_mgr_release( + buf_mgr_t *ps_buf_mgr, + WORD32 i4_buf_id, + UWORD32 u4_mask) +{ + /* If the given id is pointing to an id which is not yet added */ + if(i4_buf_id >= (WORD32)ps_buf_mgr->u4_max_buf_cnt) + { + return (-1); + } + + if(0 == (ps_buf_mgr->au4_status[i4_buf_id] & u4_mask)) + { + return (-1); + } + + ps_buf_mgr->au4_status[i4_buf_id] &= ~u4_mask; + + /* If both the REF and DISP are zero, DEC is set to zero */ + if(ps_buf_mgr->au4_status[i4_buf_id] == 1) + { + ps_buf_mgr->au4_status[i4_buf_id] = 0; + } + + return 0; +} + + +/** +******************************************************************************* +* +* @brief +* Sets the status bit. +* +* @par Description: +* sets the status bits that the mask contains (status corresponding to the +* id) +* +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer whose status needs to be modified +* +* +* @param[in] mask +* Contains the bits that are to be set +* +* @returns 0 if success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 impeg2_buf_mgr_set_status( + buf_mgr_t *ps_buf_mgr, + WORD32 i4_buf_id, + UWORD32 u4_mask) +{ + if(i4_buf_id >= (WORD32)ps_buf_mgr->u4_max_buf_cnt) + { + return (-1); + } + + + if((ps_buf_mgr->au4_status[i4_buf_id] & u4_mask) != 0) + { + return (-1); + } + + ps_buf_mgr->au4_status[i4_buf_id] |= u4_mask; + return 0; +} + + +/** +******************************************************************************* +* +* @brief +* Returns the status of the buffer. +* +* @par Description: +* Returns the status of the buffer corresponding to the id +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer status required +* +* @returns Status of the buffer corresponding to the id +* +* @remarks +* None +* +******************************************************************************* +*/ +UWORD32 impeg2_buf_mgr_get_status( + buf_mgr_t *ps_buf_mgr, + WORD32 i4_buf_id) +{ + return ps_buf_mgr->au4_status[i4_buf_id]; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the buffer from the buffer manager +* +* @par Description: +* Returns the pointer to the buffer corresponding to the id +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @param[in] buf_id +* ID of the buffer required +* +* @returns Pointer to the buffer required +* +* @remarks +* None +* +******************************************************************************* +*/ +void* impeg2_buf_mgr_get_buf( + buf_mgr_t *ps_buf_mgr, + WORD32 i4_buf_id) +{ + return ps_buf_mgr->apv_ptr[i4_buf_id]; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the no.of active buffer +* +* @par Description: +* Return the number of active buffers in the buffer manager +* +* @param[in] ps_buf_mgr +* Pointer to the buffer manager +* +* @returns number of active buffers +* +* @remarks +* None +* +******************************************************************************* +*/ +UWORD32 impeg2_buf_mgr_get_num_active_buf( + buf_mgr_t *ps_buf_mgr) +{ + return ps_buf_mgr->u4_max_buf_cnt; +} diff --git a/common/impeg2_buf_mgr.h b/common/impeg2_buf_mgr.h new file mode 100644 index 0000000..6b1cbef --- /dev/null +++ b/common/impeg2_buf_mgr.h @@ -0,0 +1,115 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2_buf_mgr.h +* +* @brief +* Function declarations used for buffer management +* +* @author +* Srinivas T +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IMPEG2_BUF_MGR_H_ +#define _IMPEG2_BUF_MGR_H_ + +#define BUF_MGR_MAX_CNT 64 + +#define BUF_MGR_DEC 1 +#define BUF_MGR_REF (1 << 1) +#define BUF_MGR_DISP (1 << 2) + +typedef struct +{ + /** + * max_buf_cnt + */ + UWORD32 u4_max_buf_cnt; + + /** + * active_buf_cnt + */ + UWORD32 u4_active_buf_cnt; + /** + * au4_status[BUF_MGR_MAX_CNT] + */ + UWORD32 au4_status[BUF_MGR_MAX_CNT]; + /* The last three bit of status are: */ + /* Bit 0 - DEC */ + /* Bit 1 - REF */ + /* Bit 2 - DISP */ + + void *apv_ptr[BUF_MGR_MAX_CNT]; +}buf_mgr_t; + +// intializes the buffer API structure +void impeg2_buf_mgr_init( + buf_mgr_t *ps_buf_mgr); + +// Add buffer to buffer manager. 0: success, -1: fail (u4_active_buf_cnt has reached u4_max_buf_cnt) +WORD32 impeg2_buf_mgr_add( + buf_mgr_t *ps_buf_mgr, + void *pv_ptr, + WORD32 buf_id); + +// this function will set the buffer status to DEC +void* impeg2_buf_mgr_get_next_free( + buf_mgr_t *ps_buf_mgr, + WORD32 *pi4_id); + +// this function will check if there are any free buffers +WORD32 impeg2_buf_mgr_check_free( + buf_mgr_t *ps_buf_mgr); + +// mask will have who released it: DISP:REF:DEC +WORD32 impeg2_buf_mgr_release( + buf_mgr_t *ps_buf_mgr, + WORD32 id, + UWORD32 mask); + +// sets the status to one or all of DISP:REF:DEC +WORD32 impeg2_buf_mgr_set_status( + buf_mgr_t *ps_buf_mgr, + WORD32 id, + UWORD32 mask); + +// Gets status of the buffer +UWORD32 impeg2_buf_mgr_get_status( + buf_mgr_t *ps_buf_mgr, + WORD32 id); + +// pass the ID - buffer will be returned +void* impeg2_buf_mgr_get_buf( + buf_mgr_t *ps_buf_mgr, + WORD32 id); + +// will return number of active buffers +UWORD32 impeg2_buf_mgr_get_num_active_buf( + buf_mgr_t *ps_buf_mgr); + + + +#endif //_IMPEG2_BUF_MGR_H_ diff --git a/common/impeg2_defs.h b/common/impeg2_defs.h new file mode 100644 index 0000000..f1523f2 --- /dev/null +++ b/common/impeg2_defs.h @@ -0,0 +1,331 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef __IMPEG2_DEFS_H__ +#define __IMPEG2_DEFS_H__ + +#include <assert.h> + +/* Decoder needs at least 4 reference buffers in order to support format conversion in a thread and +to support B pictures. Because of format conversion in a thread, codec delay is now 2 frames instead of 1. +To reduce this delay, format conversion has to wait for MB status before converting for B pictures. +To avoid this check the delay is increased to 2 and hence number of reference frames minimum is 4 */ +#define NUM_INT_FRAME_BUFFERS 4 + + +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 2160 + +#define MIN_WIDTH 16 +#define MIN_HEIGHT 16 + + +#define MAX_FRM_SIZE (MAX_WIDTH * MAX_HEIGHT * 2) /* Supports only 420P and 422ILE */ + +#define DEC_ORDER 0 + +#define MAX_BITSTREAM_BUFFER_SIZE 2000 * 1024 + + +/****************************************************************************** +* MPEG2 Start code and other code definitions +*******************************************************************************/ +#define START_CODE_PREFIX 0x000001 +#define SEQUENCE_HEADER_CODE 0x000001B3 +#define EXTENSION_START_CODE 0x000001B5 +#define USER_DATA_START_CODE 0x000001B2 +#define GOP_START_CODE 0x000001B8 +#define PICTURE_START_CODE 0x00000100 +#define SEQUENCE_END_CODE 0x000001B7 +#define RESERVED_START_CODE 0x000001B0 +#define MB_ESCAPE_CODE 0x008 + +/****************************************************************************** +* MPEG2 Length of various codes definitions +*******************************************************************************/ +#define START_CODE_LEN 32 +#define START_CODE_PREFIX_LEN 24 +#define MB_ESCAPE_CODE_LEN 11 +#define EXT_ID_LEN 4 +#define MB_QUANT_SCALE_CODE_LEN 5 +#define MB_DCT_TYPE_LEN 1 +#define MB_MOTION_TYPE_LEN 2 +#define BYTE_LEN 8 + +/****************************************************************************** +* MPEG1 code definitions +*******************************************************************************/ +#define MB_STUFFING_CODE 0x00F + +/****************************************************************************** +* MPEG1 Length of various codes definitions +*******************************************************************************/ +#define MB_STUFFING_CODE_LEN 11 + +/****************************************************************************** +* MPEG2 MB definitions +*******************************************************************************/ +#define MPEG2_INTRA_MB 0x04 +#define MPEG2_INTRAQ_MB 0x44 +#define MPEG2_INTER_MB 0x28 +#define MB_MOTION_BIDIRECT 0x30 +#define MB_INTRA_OR_PATTERN 0x0C + +/****************************************************************************** +* Tools definitions +*******************************************************************************/ +#define SPATIAL_SCALABILITY 0x01 +#define TEMPORAL_SCALABILITY 0x03 + +/****************************************************************************** +* Extension IDs definitions +*******************************************************************************/ +#define SEQ_DISPLAY_EXT_ID 0x02 +#define SEQ_SCALABLE_EXT_ID 0x05 +#define QUANT_MATRIX_EXT_ID 0x03 +#define COPYRIGHT_EXT_ID 0x04 +#define PIC_DISPLAY_EXT_ID 0x07 +#define PIC_SPATIAL_SCALABLE_EXT_ID 0x09 +#define PIC_TEMPORAL_SCALABLE_EXT_ID 0x0A +#define CAMERA_PARAM_EXT_ID 0x0B +#define ITU_T_EXT_ID 0x0C +/****************************************************************************** +* Extension IDs Length definitions +*******************************************************************************/ +#define CAMERA_PARAMETER_EXTENSION_LEN 377 +#define COPYRIGHT_EXTENSION_LEN 88 +#define GROUP_OF_PICTURE_LEN 59 + + +/****************************************************************************** +* MPEG2 Picture structure definitions +*******************************************************************************/ +#define TOP_FIELD 1 +#define BOTTOM_FIELD 2 +#define FRAME_PICTURE 3 + +/****************************************************************************** +* MPEG2 Profile definitions +*******************************************************************************/ +#define MPEG2_SIMPLE_PROFILE 0x05 +#define MPEG2_MAIN_PROFILE 0x04 + +/****************************************************************************** +* MPEG2 Level definitions +*******************************************************************************/ +#define MPEG2_LOW_LEVEL 0x0a +#define MPEG2_MAIN_LEVEL 0x08 + +/****************************************************************************** +* MPEG2 Prediction types +*******************************************************************************/ +#define FIELD_PRED 0 +#define FRAME_PRED 1 +#define DUAL_PRED 2 +#define RESERVED -1 +#define MC_16X8_PRED 3 + +/***************************************************************************** +* MPEG2 Motion vector format +******************************************************************************/ +#define FIELD_MV 0 +#define FRAME_MV 1 + +/******************************************************************************/ +/* General Video related definitions */ +/******************************************************************************/ + +#define BLK_SIZE 8 +#define NUM_COEFFS ((BLK_SIZE)*(BLK_SIZE)) +#define LUMA_BLK_SIZE (2 * (BLK_SIZE)) +#define CHROMA_BLK_SIZE (BLK_SIZE) +#define BLOCKS_IN_MB 6 +#define MB_SIZE 16 +#define MB_CHROMA_SIZE 8 +#define NUM_PELS_IN_BLOCK 64 +#define NUM_LUMA_BLKS 4 +#define NUM_CHROMA_BLKS 2 +#define MAX_COLR_COMPS 3 +#define Y_LUMA 0 +#define U_CHROMA 1 +#define V_CHROMA 2 +#define MB_LUMA_MEM_SIZE ((MB_SIZE) * (MB_SIZE)) +#define MB_CHROMA_MEM_SIZE ((MB_SIZE/2) * (MB_SIZE/2)) + +#define BITS_IN_INT 32 +/******************************************************************************/ +/* MPEG2 Motion compensation related definitions */ +/******************************************************************************/ +#define REF_FRM_MB_WIDTH 18 +#define REF_FRM_MB_HEIGHT 18 +#define REF_FLD_MB_HEIGHT 10 +#define REF_FLD_MB_WIDTH 18 + +/******************************************************************************/ +/* Maximum number of bits per MB */ +/******************************************************************************/ +#define I_MB_BIT_SIZE 90 +#define P_MB_BIT_SIZE 90 +#define B_MB_BIT_SIZE 150 + +/******************************************************************************/ +/* Aspect ratio related definitions */ +/******************************************************************************/ +#define MPG1_NTSC_4_3 0x8 +#define MPG1_PAL_4_3 0xc +#define MPG1_NTSC_16_9 0x6 +#define MPG1_PAL_16_9 0x3 +#define MPG1_1_1 0x1 + +#define MPG2_4_3 0x2 +#define MPG2_16_9 0x3 +#define MPG2_1_1 0x1 + +/******************************************************************************/ +/* Inverse Quantizer Output range */ +/******************************************************************************/ +#define IQ_OUTPUT_MAX 2047 +#define IQ_OUTPUT_MIN -2048 + +/******************************************************************************/ +/* IDCT Output range */ +/******************************************************************************/ +#define IDCT_OUTPUT_MAX 255 +#define IDCT_OUTPUT_MIN -256 + +/******************************************************************************/ +/* Output pixel range */ +/******************************************************************************/ +#define PEL_VALUE_MAX 255 +#define PEL_VALUE_MIN 0 + +/******************************************************************************/ +/* inv scan types */ +/******************************************************************************/ +#define ZIG_ZAG_SCAN 0 +#define VERTICAL_SCAN 1 + +/******************************************************************************/ +/* Related VLD codes */ +/******************************************************************************/ +#define ESC_CODE_VALUE 0x0058 +#define EOB_CODE_VALUE 0x07d0 + +#define END_OF_BLOCK 0x01 +#define ESCAPE_CODE 0x06 + +#define END_OF_BLOCK_ZERO 0x01ff +#define END_OF_BLOCK_ONE 0x01ff + +/******************** Idct Specific ***************/ +#define TRANS_SIZE_8 8 +#define IDCT_STG1_SHIFT 12 +#define IDCT_STG2_SHIFT 16 + +#define IDCT_STG1_ROUND ((1 << IDCT_STG1_SHIFT) >> 1) +#define IDCT_STG2_ROUND ((1 << IDCT_STG2_SHIFT) >> 1) + + +/****************************************************************************** +* Sample Version Definitions +*******************************************************************************/ +#define SAMPLE_VERS_MAX_FRAMES_DECODE 999 + +#define MAX_FRAME_BUFFER 7 + +/* vop coding type */ +typedef enum +{ + I_PIC = 1, + P_PIC, + B_PIC, + D_PIC +} e_pic_type_t; + +typedef enum +{ + MPEG_2_VIDEO, + MPEG_1_VIDEO +} e_video_type_t; + +typedef enum +{ + FORW, + BACK, + BIDIRECT +} e_pred_direction_t; + +typedef enum +{ + TOP, + BOTTOM +} e_field_t; + +/* Motion vectors (first/second) */ +enum +{ + FIRST, + SECOND, + THIRD, + FOURTH +}; + +enum +{ + MV_X, + MV_Y +}; + +/* Enumeration defining the various kinds of interpolation possible in +motion compensation */ +typedef enum +{ + FULL_XFULL_Y, + FULL_XHALF_Y, + HALF_XFULL_Y, + HALF_XHALF_Y +} e_sample_type_t; +typedef enum +{ + /* Params of the reference buffer used as input to MC */ + /* frame prediction in P frame picture */ + MC_FRM_FW_OR_BK_1MV, + /* field prediction in P frame picture */ + MC_FRM_FW_OR_BK_2MV, + /* frame prediction in B frame picture */ + MC_FRM_FW_AND_BK_2MV, + /* field prediction in B frame picture */ + MC_FRM_FW_AND_BK_4MV, + /* dual prime prediction in P frame picture */ + MC_FRM_FW_DUAL_PRIME_1MV, + /* frame prediction in P field picture */ + MC_FLD_FW_OR_BK_1MV, + /* 16x8 prediction in P field picture */ + MC_FLD_FW_OR_BK_2MV, + /* field prediction in B field picture */ + MC_FLD_FW_AND_BK_2MV, + /* 16x8 prediction in B field picture */ + MC_FLD_FW_AND_BK_4MV, + /* dual prime prediction in P field picture */ + MC_FLD_FW_DUAL_PRIME_1MV, +} e_mb_type_t; + +#endif /* __IMPEG2_DEFS_H__ */ + diff --git a/common/impeg2_disp_mgr.c b/common/impeg2_disp_mgr.c new file mode 100644 index 0000000..f5ede84 --- /dev/null +++ b/common/impeg2_disp_mgr.c @@ -0,0 +1,172 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2_disp_mgr.c +* +* @brief +* Contains function definitions for display management +* +* @author +* Srinivas T +* +* @par List of Functions: +* - impeg2_disp_mgr_init() +* - impeg2_disp_mgr_add() +* - impeg2_disp_mgr_get() +* +* @remarks +* None +* +******************************************************************************* +*/ +#include <stdio.h> +#include <stdlib.h> +#include "iv_datatypedef.h" +#include "impeg2_defs.h" +#include "impeg2_disp_mgr.h" + +/** +******************************************************************************* +* +* @brief +* Initialization function for display buffer manager +* +* @par Description: +* Initializes the display buffer management structure +* +* @param[in] ps_disp_mgr +* Pointer to the display buffer management structure +* +* @returns none +* +* @remarks +* None +* +******************************************************************************* +*/ +void impeg2_disp_mgr_init( + disp_mgr_t *ps_disp_mgr) +{ + WORD32 id; + + + for(id = 0; id < DISP_MGR_MAX_CNT; id++) + { + ps_disp_mgr->apv_ptr[id] = NULL; + } + + ps_disp_mgr->i4_wr_idx = 0; + ps_disp_mgr->i4_rd_idx = 0; +} + + +/** +******************************************************************************* +* +* @brief +* Adds a buffer to the display manager +* +* @par Description: +* Adds a buffer to the display buffer manager +* +* @param[in] ps_disp_mgr +* Pointer to the diaplay buffer management structure +* +* @param[in] buf_id +* ID of the display buffer +* +* @param[in] abs_poc +* Absolute POC of the display buffer +* +* @param[in] pv_ptr +* Pointer to the display buffer +* +* @returns 0 if success, -1 otherwise +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 impeg2_disp_mgr_add(disp_mgr_t *ps_disp_mgr, + void *pv_ptr, + WORD32 i4_buf_id) +{ + + + WORD32 id; + id = ps_disp_mgr->i4_wr_idx % DISP_MGR_MAX_CNT; + + ps_disp_mgr->apv_ptr[id] = pv_ptr; + ps_disp_mgr->ai4_buf_id[id] = i4_buf_id; + ps_disp_mgr->i4_wr_idx++; + + return 0; +} + + +/** +******************************************************************************* +* +* @brief +* Gets the next buffer +* +* @par Description: +* Gets the next display buffer +* +* @param[in] ps_disp_mgr +* Pointer to the display buffer structure +* +* @param[out] pi4_buf_id +* Pointer to hold buffer id of the display buffer being returned +* +* @returns Pointer to the next display buffer +* +* @remarks +* None +* +******************************************************************************* +*/ +void* impeg2_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id) +{ + WORD32 id; + + *pi4_buf_id = -1; + + if(ps_disp_mgr->i4_rd_idx < ps_disp_mgr->i4_wr_idx) + { + id = ps_disp_mgr->i4_rd_idx % DISP_MGR_MAX_CNT; + if(NULL == ps_disp_mgr->apv_ptr[id]) + { + return NULL; + } + + *pi4_buf_id = ps_disp_mgr->ai4_buf_id[id]; + + ps_disp_mgr->i4_rd_idx++; + + return ps_disp_mgr->apv_ptr[id]; + } + else + return NULL; + +} diff --git a/common/impeg2_disp_mgr.h b/common/impeg2_disp_mgr.h new file mode 100644 index 0000000..96b01b0 --- /dev/null +++ b/common/impeg2_disp_mgr.h @@ -0,0 +1,67 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2_disp_mgr.h +* +* @brief +* Function declarations used for display management +* +* @author +* Srinivas T +* +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _IMPEG2_DISP_MGR_H_ +#define _IMPEG2_DISP_MGR_H_ + +#define DISP_MGR_MAX_CNT 64 +#define DEFAULT_POC 0x7FFFFFFF + +typedef struct +{ + /** + * apv_ptr[DISP_MGR_MAX_CNT] + */ + void *apv_ptr[DISP_MGR_MAX_CNT]; + + WORD32 ai4_buf_id[DISP_MGR_MAX_CNT]; + + WORD32 i4_wr_idx; + + WORD32 i4_rd_idx; +}disp_mgr_t; + +void impeg2_disp_mgr_init( + disp_mgr_t *ps_disp_mgr); + +WORD32 impeg2_disp_mgr_add( + disp_mgr_t *ps_disp_mgr, + void *pv_ptr, + WORD32 i4_buf_id); + +void* impeg2_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id); + +#endif //_IMPEG2_DISP_MGR_H_ diff --git a/common/impeg2_format_conv.c b/common/impeg2_format_conv.c new file mode 100644 index 0000000..ec0bcfb --- /dev/null +++ b/common/impeg2_format_conv.c @@ -0,0 +1,401 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2_format_conv .c */ +/* */ +/* Description : Contains functions needed to convert the images in */ +/* different color spaces to yuv 422i color space */ +/* */ +/* List of Functions : YUV420toYUV420() */ +/* YUV420toYUV422I() */ +/* YUV420toYUV420SP_VU() */ +/* YUV420toYUV420SP_UU() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 28 08 2007 Naveen Kumar T Draft */ +/* */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ + +/* User include files */ +#include <stdio.h> +#include <string.h> +#include "iv_datatypedef.h" +#include "iv.h" +#include "ithread.h" + +#include "iv_datatypedef.h" +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" + +#include "impeg2_job_queue.h" +#include "impeg2_format_conv.h" + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_copy_frm_yuv420p() */ +/* */ +/* Description : This function performs conversion from YUV420 to */ +/* YUV422I color space. */ +/* */ +/* Inputs : pu1_src_y, - UWORD8 pointer to source y plane. */ +/* pu1_src_u, - UWORD8 pointer to source u plane. */ +/* pu1_src_v, - UWORD8 pointer to source v plane. */ +/* pu1_dst_y, - UWORD8 pointer to dest y plane. */ +/* pu1_dst_u, - UWORD8 pointer to dest u plane. */ +/* pu1_dst_v, - UWORD8 pointer to dest v plane. */ +/* u4_width, - Width of image. */ +/* u4_height, - Height of image. */ +/* u4_src_stride_y - Stride in pixels of source Y plane. */ +/* u4_src_stride_u - Stride in pixels of source U plane. */ +/* u4_src_stride_v - Stride in pixels of source V plane. */ +/* u4_dst_stride_y - Stride in pixels of dest Y plane. */ +/* u4_dst_stride_u - Stride in pixels of dest U plane. */ +/* u4_dst_stride_v - Stride in pixels of dest V plane. */ +/* */ +/* Globals : None */ +/* */ +/* Processing : One row is processed at a time. The one iteration of the */ +/* code will rearrange pixels into YUV422 interleaved */ +/* format. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 08 2007 Naveen Kumar T Draft */ +/* */ +/*****************************************************************************/ +void impeg2_copy_frm_yuv420p(UWORD8 *pu1_src_y, + UWORD8 *pu1_src_u, + UWORD8 *pu1_src_v, + UWORD8 *pu1_dst_y, + UWORD8 *pu1_dst_u, + UWORD8 *pu1_dst_v, + UWORD32 u4_width, + UWORD32 u4_height, + UWORD32 u4_src_stride_y, + UWORD32 u4_src_stride_u, + UWORD32 u4_src_stride_v, + UWORD32 u4_dst_stride_y, + UWORD32 u4_dst_stride_u, + UWORD32 u4_dst_stride_v) +{ + WORD32 i4_cnt; + WORD32 i4_y_height = (WORD32) u4_height; + WORD32 i4_uv_height = u4_height >> 1; + WORD32 i4_uv_width = u4_width >> 1; + + for(i4_cnt = 0; i4_cnt < i4_y_height; i4_cnt++) + { + memcpy(pu1_dst_y, pu1_src_y, u4_width); + pu1_dst_y += (u4_dst_stride_y); + pu1_src_y += (u4_src_stride_y); + } + + for(i4_cnt = 0; i4_cnt < i4_uv_height; i4_cnt++) + { + memcpy(pu1_dst_u, pu1_src_u, i4_uv_width); + pu1_dst_u += (u4_dst_stride_u); + pu1_src_u += (u4_src_stride_u); + + } + + for(i4_cnt = 0; i4_cnt < i4_uv_height; i4_cnt++) + { + memcpy(pu1_dst_v, pu1_src_v, i4_uv_width); + pu1_dst_v += (u4_dst_stride_v); + pu1_src_v += (u4_src_stride_v); + + } + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_fmt_conv_yuv420p_to_yuv422ile() */ +/* */ +/* Description : This function performs conversion from YUV420 to */ +/* YUV422I color space. */ +/* */ +/* Inputs : pu1_y - UWORD8 pointer to y plane. */ +/* pu1_u - UWORD8 pointer to u plane. */ +/* pu1_v - UWORD8 pointer to u plane. */ +/* pu2_yuv422i - UWORD16 pointer to yuv422iimage. */ +/* u4_width - Width of the Y plane. */ +/* u4_height - Height of the Y plane. */ +/* u4_stride_y - Stride in pixels of Y plane. */ +/* u4_stride_u - Stride in pixels of U plane. */ +/* u4_stride_v - Stride in pixels of V plane. */ +/* u4_stride_yuv422i- Stride in pixels of yuv422i image. */ +/* */ +/* Globals : None */ +/* */ +/* Processing : One row is processed at a time. The one iteration of the */ +/* code will rearrange pixels into YUV422 interleaved */ +/* format. */ +/* */ +/* Outputs : None */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 08 2007 Naveen Kumar T Draft */ +/* */ +/*****************************************************************************/ + +void impeg2_fmt_conv_yuv420p_to_yuv422ile(register UWORD8 *pu1_y, + register UWORD8 *pu1_u, + register UWORD8 *pu1_v, + void *pv_yuv422i, + UWORD32 u4_width, + UWORD32 u4_height, + UWORD32 u4_stride_y, + UWORD32 u4_stride_u, + UWORD32 u4_stride_v, + UWORD32 u4_stride_yuv422i) +{ + /* Declare local variables */ + register WORD16 i,j; + register UWORD16 u2_offset1,u2_offset2,u2_offset3,u2_offset_yuv422i; + register UWORD8 u1_y1,u1_uv; + register UWORD32 u4_pixel; + register UWORD16 u2_width_cnt; + register UWORD32 *pu4_yuv422i; + + UWORD8 u1_flag; /* This flag is used to indicate wether the row is even or odd */ + + u1_flag=0x0; /* Intialize it with 0 indicating odd row */ + + /* Calculate the offsets necessary to make input and output buffers to point next row */ + u2_offset1 = u4_stride_y - u4_width; + u2_offset2 = u4_stride_u - ((u4_width + 1) >> 1); + u2_offset3 = u4_stride_v - ((u4_width + 1) >> 1); + u2_offset_yuv422i = (u4_stride_yuv422i >> 1) -((u4_width + 1) >> 1); + + /* Type cast the output pointer to UWORD32 */ + pu4_yuv422i = (UWORD32 *)pv_yuv422i; + + /* Calculate the loop counter for inner loop */ + u2_width_cnt = u4_width >> 1; + + /* Run the loop for height of input buffer */ + for(i = u4_height; i > 0; i--) + { + /* Run the loop for width/2 */ + for(j = u2_width_cnt; j > 0; j--) + { + /* Store the value in output buffer in the order U0Y0V0Y1U2Y2V2Y3.... */ + /* Load Y0 */ + u1_y1 = *pu1_y++; + /* Load Y1 */ + u4_pixel = *pu1_y++; + /* Load V0 */ + u1_uv = *pu1_v++; + u4_pixel = (u4_pixel << 8) + u1_uv; + /* Load U0 */ + u1_uv = *pu1_u++; + u4_pixel = (u4_pixel << 8) + u1_y1; + u4_pixel = (u4_pixel << 8) + u1_uv; + *pu4_yuv422i++ = u4_pixel; + } + /* Incase of width is odd number take care of last pixel */ + if(u4_width & 0x1) + { + /* Store the value in output buffer in the order U0Y0V0Y1U2Y2V2Y3.... */ + /* Load Y0 */ + u1_y1 = *pu1_y++; + /* Load V0 */ + u1_uv = *pu1_v++; + /* Take Y0 as Y1 */ + u4_pixel = u1_y1; + u4_pixel = (u4_pixel << 8) + u1_uv; + /* Load U0 */ + u1_uv = *pu1_u++; + u4_pixel = (u4_pixel << 8) + u1_y1; + u4_pixel = (u4_pixel << 8) + u1_uv; + *pu4_yuv422i++ = u4_pixel; + } + /* Make the pointers to buffer to point to next row */ + pu1_y = pu1_y + u2_offset1; + if(!u1_flag) + { + /* Restore the pointers of u and v buffer back so that the row of pixels are also */ + /* Processed with same row of u and values again */ + pu1_u = pu1_u - ((u4_width + 1) >> 1); + pu1_v = pu1_v - ((u4_width + 1) >> 1); + } + else + { + /* Adjust the u and v buffer pointers so that they will point to next row */ + pu1_u = pu1_u + u2_offset2; + pu1_v = pu1_v + u2_offset3; + } + + /* Adjust the output buffer pointer for next row */ + pu4_yuv422i = pu4_yuv422i + u2_offset_yuv422i; + /* Toggle the flag to convert between odd and even row */ + u1_flag= u1_flag ^ 0x1; + } +} + + + + +void impeg2_fmt_conv_yuv420p_to_yuv420sp_vu(UWORD8 *pu1_y, UWORD8 *pu1_u, UWORD8 *pu1_v, + UWORD8 *pu1_dest_y, UWORD8 *pu1_dest_uv, + UWORD32 u4_height, UWORD32 u4_width,UWORD32 u4_stridey, + UWORD32 u4_strideu, UWORD32 u4_stridev, + UWORD32 u4_dest_stride_y, UWORD32 u4_dest_stride_uv, + UWORD32 u4_convert_uv_only + ) + +{ + + + UWORD8 *pu1_src,*pu1_dst; + UWORD8 *pu1_src_u, *pu1_src_v; + UWORD16 i; + UWORD32 u2_width_uv; + + UWORD32 u4_dest_inc_y=0, u4_dest_inc_uv=0; + + + /* Copy Y buffer */ + pu1_dst = (UWORD8 *)pu1_dest_y; + pu1_src = (UWORD8 *)pu1_y; + + u4_dest_inc_y = u4_dest_stride_y; + u4_dest_inc_uv = u4_dest_stride_uv; + + if(0 == u4_convert_uv_only) + { + for(i = 0; i < u4_height; i++) + { + memcpy((void *)pu1_dst,(void *)pu1_src, u4_width); + pu1_dst += u4_dest_inc_y; + pu1_src += u4_stridey; + } + } + + /* Interleave Cb and Cr buffers */ + pu1_src_u = pu1_u; + pu1_src_v = pu1_v; + pu1_dst = pu1_dest_uv ; + + u4_height = (u4_height + 1) >> 1; + u2_width_uv = (u4_width + 1) >> 1; + for(i = 0; i < u4_height ; i++) + { + UWORD32 j; + for(j = 0; j < u2_width_uv; j++) + { + *pu1_dst++ = *pu1_src_v++; + *pu1_dst++ = *pu1_src_u++; + + } + + pu1_dst += u4_dest_inc_uv - u4_width; + pu1_src_u += u4_strideu - u2_width_uv; + pu1_src_v += u4_stridev - u2_width_uv; + } +} + +void impeg2_fmt_conv_yuv420p_to_yuv420sp_uv(UWORD8 *pu1_y, UWORD8 *pu1_u, UWORD8 *pu1_v, + UWORD8 *pu1_dest_y, UWORD8 *pu1_dest_uv, + UWORD32 u4_height, UWORD32 u4_width,UWORD32 u4_stridey, + UWORD32 u4_strideu, UWORD32 u4_stridev, + UWORD32 u4_dest_stride_y, UWORD32 u4_dest_stride_uv, + UWORD32 u4_convert_uv_only) + +{ + + + UWORD8 *pu1_src,*pu1_dst; + UWORD8 *pu1_src_u, *pu1_src_v; + UWORD16 i; + UWORD32 u2_width_uv; + + UWORD32 u4_dest_inc_y=0, u4_dest_inc_uv=0; + + + /* Copy Y buffer */ + pu1_dst = (UWORD8 *)pu1_dest_y; + pu1_src = (UWORD8 *)pu1_y; + + u4_dest_inc_y = u4_dest_stride_y; + u4_dest_inc_uv = u4_dest_stride_uv; + + if(0 == u4_convert_uv_only) + { + for(i = 0; i < u4_height; i++) + { + memcpy((void *)pu1_dst,(void *)pu1_src, u4_width); + pu1_dst += u4_dest_inc_y; + pu1_src += u4_stridey; + } + } + + /* Interleave Cb and Cr buffers */ + pu1_src_u = pu1_u; + pu1_src_v = pu1_v; + pu1_dst = pu1_dest_uv ; + + u4_height = (u4_height + 1) >> 1; + u2_width_uv = (u4_width + 1) >> 1; + for(i = 0; i < u4_height ; i++) + { + UWORD32 j; + for(j = 0; j < u2_width_uv; j++) + { + *pu1_dst++ = *pu1_src_u++; + *pu1_dst++ = *pu1_src_v++; + } + + pu1_dst += u4_dest_inc_uv - u4_width; + pu1_src_u += u4_strideu - u2_width_uv; + pu1_src_v += u4_stridev - u2_width_uv; + } + +} + + diff --git a/common/impeg2_format_conv.h b/common/impeg2_format_conv.h new file mode 100644 index 0000000..52400d3 --- /dev/null +++ b/common/impeg2_format_conv.h @@ -0,0 +1,133 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2_format_conv.h */ +/* */ +/* Description : Contains coefficients and constant reqquired for */ +/* converting from rgb and gray color spaces to yuv422i */ +/* color space */ +/* */ +/* List of Functions : None */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 08 2007 Naveen Kumar T Draft */ +/* */ +/*****************************************************************************/ + +#ifndef __IMPEG2_FORMAT_CONV_H__ +#define __IMPEG2_FORMAT_CONV_H__ + +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +#define COEFF_0_Y 66 +#define COEFF_1_Y 129 +#define COEFF_2_Y 25 +#define COEFF_0_U -38 +#define COEFF_1_U -75 +#define COEFF_2_U 112 +#define COEFF_0_V 112 +#define COEFF_1_V -94 +#define COEFF_2_V -18 +#define CONST_RGB_YUV1 4096 +#define CONST_RGB_YUV2 32768 +#define CONST_GRAY_YUV 128 +#define COEF_2_V2_U 0xFFEE0070 + +#define COF_2Y_0Y 0X00190042 +#define COF_1U_0U 0XFFB5FFDA +#define COF_1V_0V 0XFFA20070 + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +typedef enum { +GRAY_SCALE = 0, +YUV444 = 1, +YUV420 = 2, +YUV422H = 3, +YUV422V = 4, +YUV411 = 5, +RGB24 = 6, +RGB24i = 7 +}input_format_t; + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +typedef void pf_copy_yuv420p_buf_t(UWORD8 *pu1_src_y, + UWORD8 *pu1_src_u, + UWORD8 *pu1_src_v, + UWORD8 *pu1_dst_y, + UWORD8 *pu1_dst_u, + UWORD8 *pu1_dst_v, + UWORD32 u4_width, + UWORD32 u4_height, + UWORD32 u4_src_stride_y, + UWORD32 u4_src_stride_u, + UWORD32 u4_src_stride_v, + UWORD32 u4_dst_stride_y, + UWORD32 u4_dst_stride_u, + UWORD32 u4_dst_stride_v); + +typedef void pf_fmt_conv_yuv420p_to_yuv422ile_t(UWORD8 *pu1_y, + UWORD8 *pu1_u, + UWORD8 *pu1_v, + void *pv_yuv422i, + UWORD32 u4_width, + UWORD32 u4_height, + UWORD32 u4_stride_y, + UWORD32 u4_stride_u, + UWORD32 u4_stride_v, + UWORD32 u4_stride_yuv422i); + +typedef void pf_fmt_conv_yuv420p_to_yuv420sp_t(UWORD8 *pu1_y, + UWORD8 *pu1_u, + UWORD8 *pu1_v, + UWORD8 *pu1_dest_y, + UWORD8 *pu1_dest_uv, + UWORD32 u2_height, + UWORD32 u2_width, + UWORD32 u2_stridey, + UWORD32 u2_strideu, + UWORD32 u2_stridev, + UWORD32 u2_dest_stride_y, + UWORD32 u2_dest_stride_uv, + UWORD32 convert_uv_only); + +pf_copy_yuv420p_buf_t impeg2_copy_frm_yuv420p; +pf_fmt_conv_yuv420p_to_yuv422ile_t impeg2_fmt_conv_yuv420p_to_yuv422ile; +pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_vu; +pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_uv; + +pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q; +pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q; + +pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8; +pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8; + + +#endif /* __IMPEG2_FORMAT_CONV_H__ */ diff --git a/common/impeg2_globals.c b/common/impeg2_globals.c new file mode 100644 index 0000000..9193ef7 --- /dev/null +++ b/common/impeg2_globals.c @@ -0,0 +1,351 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <stdio.h> +#include "iv_datatypedef.h" +#include "iv.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_globals.h" + +/* Table for converting the quantizer_scale_code to quantizer_scale */ +const UWORD8 gau1_impeg2_non_linear_quant_scale[] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8,10,12,14,16,18,20,22, + 24,28,32,36,40,44,48,52, + 56,64,72,80,88,96,104,112 +}; + + +/* Default quantizer matrix to be used for intra blocks */ +const UWORD8 gau1_impeg2_intra_quant_matrix_default[] = +{ + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 +}; + +/* Default quantizer matrix to be used for inter blocks */ +const UWORD8 gau1_impeg2_inter_quant_matrix_default[] = +{ + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16 +}; + +/* Table to perform inverse scan when the scan direction is zigzag */ +const UWORD8 gau1_impeg2_inv_scan_zig_zag[] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +/* Table to perform inverse scan when the direction of scanning is vertical */ +const UWORD8 gau1_impeg2_inv_scan_vertical[] = +{ + 0, 8, 16, 24, 1, 9, 2, 10, + 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, + 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, + 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, + 38, 46, 54, 62, 39, 47, 55, 63 +}; + +/*****************************************************************************/ +/* Table that indicate which interpolation type is to used */ +/*****************************************************************************/ +/* Chroma when motion vector is positive */ +const UWORD16 gau2_impeg2_chroma_interp_mv[][16] = +{ + /* Pos X Pos Y */ + { + 0, 0, 1, 1, + 0, 0, 1, 1, + 2, 2, 3, 3, + 2, 2, 3, 3 + }, + /* Neg X Pos Y */ + { + 0, 1, 1, 0, + 0, 1, 1, 0, + 2, 3, 3, 2, + 2, 3, 3, 2 + }, + /* Pos X Neg Y */ + { + 0, 0, 1, 1, + 2, 2, 3, 3, + 2, 2, 3, 3, + 0, 0, 1, 1 + }, + /* Neg X Neg Y */ + { + 0, 1, 1, 0, + 2, 3, 3, 2, + 2, 3, 3, 2, + 0, 1, 1, 0 + } +}; +/*****************************************************************************/ +/* Input #1 Offset in bytes */ +/*****************************************************************************/ +/* Chroma */ +const UWORD16 gau2_impeg2_chroma_interp_inp1[][16] = +{ + /* Pos X Pos Y */ + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 + }, + /* Neg X Pos Y */ + { + 0, 0, 0, 4, + 0, 0, 0, 4, + 0, 0, 0, 4, + 0, 0, 0, 4 + }, + /* Pos X Neg Y */ + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 72, 72, 72, 72 + }, + /* Neg X Neg Y */ + { + 0, 0, 0, 4, + 0, 0, 0, 4, + 0, 0, 0, 4, + 72, 72, 72, 76 + } +}; +/* Luma */ +const UWORD16 gau2_impeg2_luma_interp_inp1[] = +{ + 1, 1, 3, 3, + 1, 1, 3, 3, + 37, 37, 39, 39, + 37, 37, 39, 39 +}; +/*****************************************************************************/ +/* Input #2 Offset from Input #1 in bytes */ +/*****************************************************************************/ +/* + FXFY 0, + HXFY 2, + FXHY 36, + HXHY 36 +*/ +const UWORD16 gau2_impeg2_luma_interp_inp2[] = +{ + 0, 2, 0, 2, + 36, 36, 36, 36, + 0, 2, 0, 2, + 36, 36, 36, 36 +}; +const UWORD16 gau2_impeg2_chroma_interp_inp2[] = +{ + /* FXFY */ + 0, + /* HXFY */ + 4, + /* FXHY */ + 72, + /* HXHY */ + 72 +}; + +/*****************************************************************************/ +/* Corresponds to Table 6-4 frame_rate_value of the standard */ +/*****************************************************************************/ +/* + frame_rate_code frame_rate_value + + 0000 Forbidden + 0001 24 000 ÷ 1001 + 0010 24 + 0011 25 + 0100 30 000 ÷ 1001 + 0101 30 + 0110 50 + 0111 60 000 ÷ 1001 + 1000 60 + 1001 Reserved + .... + 1111 Reserved +*/ +const UWORD16 gau2_impeg2_frm_rate_code[][2] = +{ + {1 , 1}, /* Forbidden */ + {24000, 1001}, + {24000, 1000}, + {25000, 1000}, + {30000, 1001}, + {30000, 1000}, + {50000, 1000}, + {60000, 1001}, + {60000, 1000} + /* Rest reserved */ +}; + +const WORD16 gai2_impeg2_idct_q15[] = +{ + 23170, 23170, 23170, 23170, 23170, 23170, 23170, 23170, + 32138, 27246, 18205, 6393, -6393, -18205, -27246, -32138, + 30274, 12540, -12540, -30274, -30274, -12540, 12540, 30274, + 27246, -6393, -32138, -18205, 18205, 32138, 6393, -27246, + 23170, -23170, -23170, 23170, 23170, -23170, -23170, 23170, + 18205, -32138, 6393, 27246, -27246, -6393, 32138, -18205, + 12540, -30274, 30274, -12540, -12540, 30274, -30274, 12540, + 6393, -18205, 27246, -32138, 32138, -27246, 18205, -6393, +}; + +const WORD16 gai2_impeg2_idct_q11[] = +{ + 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, + 2009, 1703, 1138, 400, -400, -1138, -1703, -2009, + 1892, 784, -784, -1892, -1892, -784, 784, 1892, + 1703, -400, -2009, -1138, 1138, 2009, 400, -1703, + 1448, -1448, -1448, 1448, 1448, -1448, -1448, 1448, + 1138, -2009, 400, 1703, -1703, -400, 2009, -1138, + 784, -1892, 1892, -784, -784, 1892, -1892, 784, + 400, -1138, 1703, -2009, 2009, -1703, 1138, -400, +}; + +const WORD16 gai2_impeg2_idct_even_8_q15[][8] = +{ + { 23170, 23170, 23170, 23170, 23170, 23170, 23170, 23170 }, + { 12540, -30274, 12540, -30274, 12540, -30274, 12540, -30274 }, + { 30274, 12540, 30274, 12540, 30274, 12540, 30274, 12540 }, + { 23170, -23170, 23170, -23170, 23170, -23170, 23170, -23170 } +}; +const WORD16 gai2_impeg2_idct_odd_8_q15[][8] = +{ + { 32138, 27246, 32138, 27246, 32138, 27246, 32138, 27246 }, + { 18205, 6393, 18205, 6393, 18205, 6393, 18205, 6393 }, + { 27246, -6393, 27246, -6393, 27246, -6393, 27246, -6393 }, + { 32138, 18205, 32138, 18205, 32138, 18205, 32138, 18205 }, + { 18205, -32138, 18205, -32138, 18205, -32138, 18205, -32138 }, + { 6393, 27246, 6393, 27246, 6393, 27246, 6393, 27246 }, + { 6393, -18205, 6393, -18205, 6393, -18205, 6393, -18205 }, + { 27246, -32138, 27246, -32138, 27246, -32138, 27246, -32138 }, +}; + +const WORD16 gai2_impeg2_idct_even_8_q11[][8] = +{ + { 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448 }, + { 784, -1892, 784, -1892, 784, -1892, 784, -1892 }, + { 1892, 784, 1892, 784, 1892, 784, 1892, 784 }, + { 1448, -1448, 1448, -1448, 1448, -1448, 1448, -1448 } +}; +const WORD16 gai2_impeg2_idct_odd_8_q11[][8] = +{ + { 2009, 1703, 2009, 1703, 2009, 1703, 2009, 1703 }, + { 1138, 400, 1138, 400, 1138, 400, 1138, 400 }, + { 1703, -400, 1703, -400, 1703, -400, 1703, -400 }, + { 2009, 1138, 2009, 1138, 2009, 1138, 2009, 1138 }, + { 1138, -2009, 1138, -2009, 1138, -2009, 1138, -2009 }, + { 400, 1703, 400, 1703, 400, 1703, 400, 1703 }, + { 400, -1138, 400, -1138, 400, -1138, 400, -1138 }, + { 1703, -2009, 1703, -2009, 1703, -2009, 1703, -2009 }, +}; + + + +/*****************************************************************************/ +/* Last row IDCT Coefficients in Q11 format */ +/*****************************************************************************/ +const WORD16 gai2_impeg2_idct_last_row_q11[] = +{ + 400, -1138, 1703, -2009, 2009, -1703, 1138, -400, +}; + +const WORD16 gai2_impeg2_idct_first_col_q15[] = +{ + 23170, 32138, 30274, 27246, 23170, 18205, 12540, 6393, +}; + +const WORD16 gai2_impeg2_idct_first_col_q11[] = +{ + 1448, 2009, 1892, 1703, 1448, 1138, 784, 400, +}; + +/*****************************************************************************/ +/* Output of first stage dct (using gai2_impeg2_idct_q15 as coeffs) */ +/* for a 1D data (0, 0, 0, 0, 0, 0, 0, 1) */ +/*****************************************************************************/ + +const WORD16 gai2_impeg2_mismatch_stg1_outp[] = +{ + 2, -4, 7, -8, 8, -7, 4, -2 +}; + +const WORD16 gai2_impeg2_mismatch_stg2_additive[] = +{ + 800, -2276, 3406, -4018, 4018, -3406, 2276, -800, + -1600, 4552, -6812, 8036, -8036, 6812, -4552, 1600, + 2800, -7966, 11921, -14063, 14063, -11921, 7966, -2800, + -3200, 9104, -13624, 16072, -16072, 13624, -9104, 3200, + 3200, -9104, 13624, -16072, 16072, -13624, 9104, -3200, + -2800, 7966, -11921, 14063, -14063, 11921, -7966, 2800, + 1600, -4552, 6812, -8036, 8036, -6812, 4552, -1600, + -800, 2276, -3406, 4018, -4018, 3406, -2276, 800, +}; + + +const UWORD8 gau1_impeg2_zerobuf[] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; +/*****************************************************************************/ +/* Tables of offset needed to address block in an MB */ +/*****************************************************************************/ +const WORD16 gai2_impeg2_blk_y_off_fld[] = {0,0,1,1}; +const WORD16 gai2_impeg2_blk_y_off_frm[] = {0,0,8,8}; +const WORD16 gai2_impeg2_blk_x_off[] = {0,8,0,8}; diff --git a/common/impeg2_globals.h b/common/impeg2_globals.h new file mode 100755 index 0000000..e8c6865 --- /dev/null +++ b/common/impeg2_globals.h @@ -0,0 +1,57 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_GLOBALS_H__ +#define __IMPEG2_GLOBALS_H__ + +extern const UWORD8 gau1_impeg2_non_linear_quant_scale[]; +extern const UWORD8 gau1_impeg2_intra_quant_matrix_default[]; +extern const UWORD8 gau1_impeg2_inter_quant_matrix_default[]; +extern const UWORD8 gau1_impeg2_inv_scan_vertical[]; +extern const UWORD8 gau1_impeg2_inv_scan_zig_zag[]; +extern const UWORD16 gau2_impeg2_frm_rate_code[][2]; + +extern const UWORD16 gau2_impeg2_chroma_interp_mv[][16]; +extern const UWORD16 gau2_impeg2_chroma_interp_inp1[][16]; +extern const UWORD16 gau2_impeg2_luma_interp_inp1[]; +extern const UWORD16 gau2_impeg2_luma_interp_inp2[]; +extern const UWORD16 gau2_impeg2_chroma_interp_inp2[]; + +extern const WORD16 gai2_impeg2_idct_q15[]; +extern const WORD16 gai2_impeg2_idct_q11[]; + +extern const WORD16 gai2_impeg2_mismatch_stg1_outp[]; +extern const WORD16 gai2_impeg2_idct_last_row_q11[]; +extern const WORD16 gai2_impeg2_idct_first_col_q15[]; +extern const WORD16 gai2_impeg2_idct_first_col_q11[]; +extern const WORD16 gai2_impeg2_mismatch_stg2_additive[]; + +extern const WORD16 gai2_impeg2_blk_y_off_fld[]; +extern const WORD16 gai2_impeg2_blk_y_off_frm[]; +extern const WORD16 gai2_impeg2_blk_x_off[]; + +extern const UWORD8 gau1_impeg2_zerobuf[]; + +extern const WORD16 gai2_impeg2_idct_odd_8_q15[8][8]; +extern const WORD16 gai2_impeg2_idct_odd_8_q11[8][8]; + +extern const WORD16 gai2_impeg2_idct_even_8_q11[4][8]; +extern const WORD16 gai2_impeg2_idct_even_8_q15[4][8]; + +#endif /* __IMPEG2_GLOBALS_H__ */ diff --git a/common/impeg2_idct.c b/common/impeg2_idct.c new file mode 100644 index 0000000..6834260 --- /dev/null +++ b/common/impeg2_idct.c @@ -0,0 +1,500 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2_idct.c */ +/* */ +/* Description : Contains 2d idct and invese quantization functions */ +/* */ +/* List of Functions : impeg2_idct_recon_dc() */ +/* impeg2_idct_recon_dc_mismatch() */ +/* impeg2_idct_recon() */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 10 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +/* + IEEE - 1180 results for this IDCT + L 256 256 5 5 300 300 384 384 Thresholds + H 255 255 5 5 300 300 383 383 + sign 1 -1 1 -1 1 -1 1 -1 + Peak Error 1 1 1 1 1 1 1 1 1 + Peak Mean Square Error 0.0191 0.0188 0.0108 0.0111 0.0176 0.0188 0.0165 0.0177 0.06 + Overall Mean Square Error 0.01566406 0.01597656 0.0091875 0.00908906 0.01499063 0.01533281 0.01432344 0.01412344 0.02 + Peak Mean Error 0.0027 0.0026 0.0028 0.002 0.0017 0.0033 0.0031 0.0025 0.015 + Overall Mean Error 0.00002656 -0.00031406 0.00016875 0.00005469 -0.00003125 0.00011406 0.00009219 0.00004219 0.0015 + */ +#include <stdio.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" + +#include "impeg2_macros.h" +#include "impeg2_globals.h" +#include "impeg2_idct.h" + + +void impeg2_idct_recon_dc(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 i4_src_strd, + WORD32 i4_pred_strd, + WORD32 i4_dst_strd, + WORD32 i4_zero_cols, + WORD32 i4_zero_rows) +{ + WORD32 i4_val, i, j; + + UNUSED(pi2_tmp); + UNUSED(i4_src_strd); + UNUSED(i4_zero_cols); + UNUSED(i4_zero_rows); + + i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; + i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); + i4_val = i4_val * gai2_impeg2_idct_q11[0]; + i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); + + for(i = 0; i < TRANS_SIZE_8; i++) + { + for(j = 0; j < TRANS_SIZE_8; j++) + { + pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]); + } + pu1_dst += i4_dst_strd; + pu1_pred += i4_pred_strd; + } +} +void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 i4_src_strd, + WORD32 i4_pred_strd, + WORD32 i4_dst_strd, + WORD32 i4_zero_cols, + WORD32 i4_zero_rows) + +{ + WORD32 i4_val, i, j; + WORD32 i4_count = 0; + WORD32 i4_sum; + + UNUSED(pi2_tmp); + UNUSED(i4_src_strd); + UNUSED(i4_zero_cols); + UNUSED(i4_zero_rows); + + i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; + i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); + + i4_val *= gai2_impeg2_idct_q11[0]; + for(i = 0; i < TRANS_SIZE_8; i++) + { + for (j = 0; j < TRANS_SIZE_8; j++) + { + i4_sum = i4_val; + i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count]; + i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); + i4_sum += pu1_pred[j]; + pu1_dst[j] = CLIP_U8(i4_sum); + i4_count++; + } + + pu1_dst += i4_dst_strd; + pu1_pred += i4_pred_strd; + } + +} +/** + ******************************************************************************* + * + * @brief + * This function performs Inverse transform and reconstruction for 8x8 + * input block + * + * @par Description: + * Performs inverse transform and adds the prediction data and clips output + * to 8 bit + * + * @param[in] pi2_src + * Input 8x8 coefficients + * + * @param[in] pi2_tmp + * Temporary 8x8 buffer for storing inverse + * + * transform + * 1st stage output + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[out] pu1_dst + * Output 8x8 block + * + * @param[in] src_strd + * Input stride + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Output Stride + * + * @param[in] shift + * Output shift + * + * @param[in] zero_cols + * Zero columns in pi2_src + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + +void impeg2_idct_recon(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 i4_src_strd, + WORD32 i4_pred_strd, + WORD32 i4_dst_strd, + WORD32 i4_zero_cols, + WORD32 i4_zero_rows) +{ + WORD32 j, k; + WORD32 ai4_e[4], ai4_o[4]; + WORD32 ai4_ee[2], ai4_eo[2]; + WORD32 i4_add; + WORD32 i4_shift; + WORD16 *pi2_tmp_orig; + WORD32 i4_trans_size; + WORD32 i4_zero_rows_2nd_stage = i4_zero_cols; + WORD32 i4_row_limit_2nd_stage; + + i4_trans_size = TRANS_SIZE_8; + + pi2_tmp_orig = pi2_tmp; + + if((i4_zero_cols & 0xF0) == 0xF0) + i4_row_limit_2nd_stage = 4; + else + i4_row_limit_2nd_stage = TRANS_SIZE_8; + + + if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + + /* Inverse Transform 1st stage */ + i4_shift = IDCT_STG1_SHIFT; + i4_add = 1 << (i4_shift - 1); + + for(j = 0; j < i4_row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((i4_zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] + + gai2_impeg2_idct_q15[3 * 8 + k] + * pi2_src[3 * i4_src_strd]; + } + ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]; + ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]; + ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]; + ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + ai4_e[0] = ai4_ee[0] + ai4_eo[0]; + ai4_e[3] = ai4_ee[0] - ai4_eo[0]; + ai4_e[1] = ai4_ee[1] + ai4_eo[1]; + ai4_e[2] = ai4_ee[1] - ai4_eo[1]; + for(k = 0; k < 4; k++) + { + pi2_tmp[k] = + CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); + pi2_tmp[k + 4] = + CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); + } + } + pi2_src++; + pi2_tmp += i4_trans_size; + i4_zero_cols = i4_zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + i4_shift = IDCT_STG2_SHIFT; + i4_add = 1 << (i4_shift - 1); + if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < i4_trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] + + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; + } + ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; + ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; + ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; + ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + ai4_e[0] = ai4_ee[0] + ai4_eo[0]; + ai4_e[3] = ai4_ee[0] - ai4_eo[0]; + ai4_e[1] = ai4_ee[1] + ai4_eo[1]; + ai4_e[2] = ai4_ee[1] - ai4_eo[1]; + for(k = 0; k < 4; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); + pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); + itrans_out = + CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); + pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); + } + pi2_tmp++; + pu1_pred += i4_pred_strd; + pu1_dst += i4_dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < i4_trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] + + gai2_impeg2_idct_q11[3 * 8 + k] + * pi2_tmp[3 * i4_trans_size] + + gai2_impeg2_idct_q11[5 * 8 + k] + * pi2_tmp[5 * i4_trans_size] + + gai2_impeg2_idct_q11[7 * 8 + k] + * pi2_tmp[7 * i4_trans_size]; + } + + ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] + + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; + ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] + + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; + ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] + + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; + ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] + + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + ai4_e[0] = ai4_ee[0] + ai4_eo[0]; + ai4_e[3] = ai4_ee[0] - ai4_eo[0]; + ai4_e[1] = ai4_ee[1] + ai4_eo[1]; + ai4_e[2] = ai4_ee[1] - ai4_eo[1]; + for(k = 0; k < 4; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); + pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); + itrans_out = + CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); + pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); + } + pi2_tmp++; + pu1_pred += i4_pred_strd; + pu1_dst += i4_dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + } + else /* All rows of input are non-zero */ + { + /************************************************************************************************/ + /**********************************START - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + + /* Inverse Transform 1st stage */ + i4_shift = IDCT_STG1_SHIFT; + i4_add = 1 << (i4_shift - 1); + + for(j = 0; j < i4_row_limit_2nd_stage; j++) + { + /* Checking for Zero Cols */ + if((i4_zero_cols & 1) == 1) + { + memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); + } + else + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] + + gai2_impeg2_idct_q15[3 * 8 + k] + * pi2_src[3 * i4_src_strd] + + gai2_impeg2_idct_q15[5 * 8 + k] + * pi2_src[5 * i4_src_strd] + + gai2_impeg2_idct_q15[7 * 8 + k] + * pi2_src[7 * i4_src_strd]; + } + + ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd] + + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd]; + ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd] + + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd]; + ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0] + + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd]; + ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0] + + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + ai4_e[0] = ai4_ee[0] + ai4_eo[0]; + ai4_e[3] = ai4_ee[0] - ai4_eo[0]; + ai4_e[1] = ai4_ee[1] + ai4_eo[1]; + ai4_e[2] = ai4_ee[1] - ai4_eo[1]; + for(k = 0; k < 4; k++) + { + pi2_tmp[k] = + CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); + pi2_tmp[k + 4] = + CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); + } + } + pi2_src++; + pi2_tmp += i4_trans_size; + i4_zero_cols = i4_zero_cols >> 1; + } + + pi2_tmp = pi2_tmp_orig; + + /* Inverse Transform 2nd stage */ + i4_shift = IDCT_STG2_SHIFT; + i4_add = 1 << (i4_shift - 1); + if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ + { + for(j = 0; j < i4_trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] + + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; + } + ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; + ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; + ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; + ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + ai4_e[0] = ai4_ee[0] + ai4_eo[0]; + ai4_e[3] = ai4_ee[0] - ai4_eo[0]; + ai4_e[1] = ai4_ee[1] + ai4_eo[1]; + ai4_e[2] = ai4_ee[1] - ai4_eo[1]; + for(k = 0; k < 4; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); + pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); + itrans_out = + CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); + pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); + } + pi2_tmp++; + pu1_pred += i4_pred_strd; + pu1_dst += i4_dst_strd; + } + } + else /* All rows of output of 1st stage are non-zero */ + { + for(j = 0; j < i4_trans_size; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + for(k = 0; k < 4; k++) + { + ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] + + gai2_impeg2_idct_q11[3 * 8 + k] + * pi2_tmp[3 * i4_trans_size] + + gai2_impeg2_idct_q11[5 * 8 + k] + * pi2_tmp[5 * i4_trans_size] + + gai2_impeg2_idct_q11[7 * 8 + k] + * pi2_tmp[7 * i4_trans_size]; + } + + ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] + + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; + ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] + + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; + ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] + + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; + ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] + + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; + + /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ + ai4_e[0] = ai4_ee[0] + ai4_eo[0]; + ai4_e[3] = ai4_ee[0] - ai4_eo[0]; + ai4_e[1] = ai4_ee[1] + ai4_eo[1]; + ai4_e[2] = ai4_ee[1] - ai4_eo[1]; + for(k = 0; k < 4; k++) + { + WORD32 itrans_out; + itrans_out = + CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); + pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); + itrans_out = + CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); + pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); + } + pi2_tmp++; + pu1_pred += i4_pred_strd; + pu1_dst += i4_dst_strd; + } + } + /************************************************************************************************/ + /************************************END - IT_RECON_8x8******************************************/ + /************************************************************************************************/ + } +} + diff --git a/common/impeg2_idct.h b/common/impeg2_idct.h new file mode 100644 index 0000000..80defde --- /dev/null +++ b/common/impeg2_idct.h @@ -0,0 +1,66 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_IDCT_H__ +#define __IMPEG2_IDCT_H__ + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +typedef void pf_idct_recon_t(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows); + +/* ARM assembly modules curently ignore non_zero_cols argument */ +pf_idct_recon_t impeg2_idct_recon_dc; + +pf_idct_recon_t impeg2_idct_recon_dc_mismatch; + +pf_idct_recon_t impeg2_idct_recon; + + +pf_idct_recon_t impeg2_idct_recon_dc_a9q; + +pf_idct_recon_t impeg2_idct_recon_dc_mismatch_a9q; + +pf_idct_recon_t impeg2_idct_recon_a9q; + + +pf_idct_recon_t impeg2_idct_recon_dc_av8; + +pf_idct_recon_t impeg2_idct_recon_dc_mismatch_av8; + +pf_idct_recon_t impeg2_idct_recon_av8; + +pf_idct_recon_t impeg2_idct_recon_sse42; + +pf_idct_recon_t impeg2_idct_recon_dc_mismatch_sse42; + +pf_idct_recon_t impeg2_idct_recon_dc_sse42; + +#endif /* #ifndef __IMPEG2_IDCT_H__ */ + diff --git a/common/impeg2_inter_pred.c b/common/impeg2_inter_pred.c new file mode 100644 index 0000000..019fa5c --- /dev/null +++ b/common/impeg2_inter_pred.c @@ -0,0 +1,467 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_mcu.c +* +* @brief +* Contains MC function definitions for MPEG2 decoder +* +* @author +* Harish +* +* @par List of Functions: +* - impeg2_copy_mb() +* - impeg2_interpolate() +* - impeg2_mc_halfx_halfy_8x8() +* - impeg2_mc_halfx_fully_8x8() +* - impeg2_mc_fullx_halfy_8x8() +* - impeg2_mc_fullx_fully_8x8() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include <stdio.h> +#include <string.h> +#include "iv_datatypedef.h" +#include "iv.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" + +#include "impeg2_inter_pred.h" +#include "impeg2_globals.h" +#include "impeg2_macros.h" +#include "impeg2_idct.h" + +/******************************************************************************* +* Function Name : impeg2_copy_mb +* +* Description : copies 3 components to the frame from mc_buf +* +* Arguments : +* src_buf : Source Buffer +* dst_buf : Destination Buffer +* src_offset_x : X offset for source +* src_offset_y : Y offset for source +* dst_offset_x : X offset for destination +* dst_offset_y : Y offset for destination +* src_wd : Source Width +* dst_wd : destination Width +* rows : Number of rows +* cols : Number of columns +* +* Values Returned : None +*******************************************************************************/ +void impeg2_copy_mb(yuv_buf_t *ps_src_buf, + yuv_buf_t *ps_dst_buf, + UWORD32 u4_src_wd, + UWORD32 u4_dst_wd) +{ + UWORD8 *pu1_src; + UWORD8 *pu1_dst; + UWORD32 i; + UWORD32 u4_rows = MB_SIZE; + UWORD32 u4_cols = MB_SIZE; + + /*******************************************************/ + /* copy Y */ + /*******************************************************/ + pu1_src = ps_src_buf->pu1_y; + pu1_dst = ps_dst_buf->pu1_y; + for(i = 0; i < u4_rows; i++) + { + memcpy(pu1_dst, pu1_src, u4_cols); + pu1_src += u4_src_wd; + pu1_dst += u4_dst_wd; + } + + u4_src_wd >>= 1; + u4_dst_wd >>= 1; + u4_rows >>= 1; + u4_cols >>= 1; + + /*******************************************************/ + /* copy U */ + /*******************************************************/ + pu1_src = ps_src_buf->pu1_u; + pu1_dst = ps_dst_buf->pu1_u; + for(i = 0; i < u4_rows; i++) + { + memcpy(pu1_dst, pu1_src, u4_cols); + + pu1_src += u4_src_wd; + pu1_dst += u4_dst_wd; + } + /*******************************************************/ + /* copy V */ + /*******************************************************/ + pu1_src = ps_src_buf->pu1_v; + pu1_dst = ps_dst_buf->pu1_v; + for(i = 0; i < u4_rows; i++) + { + memcpy(pu1_dst, pu1_src, u4_cols); + + pu1_src += u4_src_wd; + pu1_dst += u4_dst_wd; + } + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_interpolate */ +/* */ +/* Description : averages the contents of buf_src1 and buf_src2 and stores*/ +/* result in buf_dst */ +/* */ +/* Inputs : buf_src1 - First Source */ +/* buf_src2 - Second Source */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Avg the values from two sources and store the result in */ +/* destination buffer */ +/* */ +/* Outputs : buf_dst - Avg of contents of buf_src1 and buf_src2 */ +/* */ +/* Returns : None */ +/* */ +/* Issues : Assumes that all 3 buffers are of same size */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Harish M First Version */ +/* 15 09 2010 Venkat Added stride */ +/* */ +/*****************************************************************************/ +void impeg2_interpolate(yuv_buf_t *ps_buf_src1, + yuv_buf_t *ps_buf_src2, + yuv_buf_t *ps_buf_dst, + UWORD32 u4_stride) +{ + + UWORD32 i,j; + UWORD8 *pu1_src1,*pu1_src2,*pu1_dst; + pu1_src1 = ps_buf_src1->pu1_y; + pu1_src2 = ps_buf_src2->pu1_y; + pu1_dst = ps_buf_dst->pu1_y; + for(i = MB_SIZE; i > 0; i--) + { + for(j = MB_SIZE; j > 0; j--) + { + *pu1_dst++ = ((*pu1_src1++) + (*pu1_src2++) + 1) >> 1; + } + + pu1_dst += u4_stride - MB_SIZE; + + } + + u4_stride >>= 1; + + pu1_src1 = ps_buf_src1->pu1_u; + pu1_src2 = ps_buf_src2->pu1_u; + pu1_dst = ps_buf_dst->pu1_u; + for(i = MB_CHROMA_SIZE; i > 0 ; i--) + { + for(j = MB_CHROMA_SIZE; j > 0; j--) + { + *pu1_dst++ = ((*pu1_src1++) + (*pu1_src2++) + 1) >> 1; + } + + pu1_dst += u4_stride - MB_CHROMA_SIZE; + } + + pu1_src1 = ps_buf_src1->pu1_v; + pu1_src2 = ps_buf_src2->pu1_v; + pu1_dst = ps_buf_dst->pu1_v; + for(i = MB_CHROMA_SIZE; i > 0 ; i--) + { + for(j = MB_CHROMA_SIZE; j > 0; j--) + { + *pu1_dst++ = ((*pu1_src1++) + (*pu1_src2++) + 1) >> 1; + } + + pu1_dst += u4_stride - MB_CHROMA_SIZE; + } + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_halfx_halfy_8x8() */ +/* */ +/* Description : Gets the buffer from (0.5,0.5) to (8.5,8.5) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0),(1,0),(0,1),(1,1) position in */ +/* the ref frame.Interpolate these four values to get the */ +/* value at(0.5,0.5).Repeat this to get an 8 x 8 block */ +/* using 9 x 9 block from reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +void impeg2_mc_halfx_halfy_8x8(UWORD8 *pu1_out, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD32 u4_out_wid) +{ + UWORD8 *pu1_ref_p0,*pu1_ref_p1,*pu1_ref_p2,*pu1_ref_p3; + UWORD32 i,j; + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 P1 + Q + P2 P3 + */ + + pu1_ref_p0 = pu1_ref; + pu1_ref_p1 = pu1_ref + 1; + pu1_ref_p2 = pu1_ref + u4_ref_wid; + pu1_ref_p3 = pu1_ref + u4_ref_wid + 1; + + for(i = 0; i < BLK_SIZE; i++) + { + for(j = 0; j < BLK_SIZE; j++) + { + *pu1_out++ = (( (*pu1_ref_p0++ ) + + (*pu1_ref_p1++ ) + + (*pu1_ref_p2++ ) + + (*pu1_ref_p3++ ) + 2 ) >> 2); + } + pu1_ref_p0 += u4_ref_wid - BLK_SIZE; + pu1_ref_p1 += u4_ref_wid - BLK_SIZE; + pu1_ref_p2 += u4_ref_wid - BLK_SIZE; + pu1_ref_p3 += u4_ref_wid - BLK_SIZE; + + pu1_out += u4_out_wid - BLK_SIZE; + } + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_halfx_fully_8x8() */ +/* */ +/* Description : Gets the buffer from (0.5,0) to (8.5,8) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) and (1,0) position in the ref frame */ +/* Interpolate these two values to get the value at(0.5,0) */ +/* Repeat this to get an 8 x 8 block using 9 x 8 block from */ +/* reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +void impeg2_mc_halfx_fully_8x8(UWORD8 *pu1_out, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD32 u4_out_wid) +{ + UWORD8 *pu1_ref_p0, *pu1_ref_p1; + UWORD32 i,j; + + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 Q P1 + */ + + pu1_ref_p0 = pu1_ref; + pu1_ref_p1 = pu1_ref + 1; + + for(i = 0; i < BLK_SIZE; i++) + { + for(j = 0; j < BLK_SIZE; j++) + { + *pu1_out++ = ((( *pu1_ref_p0++ ) + + (*pu1_ref_p1++) + 1 ) >> 1); + } + pu1_ref_p0 += u4_ref_wid - BLK_SIZE; + pu1_ref_p1 += u4_ref_wid - BLK_SIZE; + + pu1_out += u4_out_wid - BLK_SIZE; + } + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_fullx_halfy_8x8() */ +/* */ +/* Description : Gets the buffer from (0,0.5) to (8,8.5) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) and (0,1) position in the ref frame */ +/* Interpolate these two values to get the value at(0,0.5) */ +/* Repeat this to get an 8 x 8 block using 8 x 9 block from */ +/* reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +void impeg2_mc_fullx_halfy_8x8(UWORD8 *pu1_out, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD32 u4_out_wid) +{ + + UWORD8 *pu1_ref_p0, *pu1_ref_p1; + UWORD32 i,j; + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 + x + P1 + */ + pu1_ref_p0 = pu1_ref; + pu1_ref_p1 = pu1_ref + u4_ref_wid; + + for(i = 0; i < BLK_SIZE; i++) + { + for(j = 0; j < BLK_SIZE; j++) + { + *pu1_out++ = ((( *pu1_ref_p0++) + + (*pu1_ref_p1++) + 1 ) >> 1); + } + pu1_ref_p0 += u4_ref_wid - BLK_SIZE; + pu1_ref_p1 += u4_ref_wid - BLK_SIZE; + + pu1_out += u4_out_wid - BLK_SIZE; + } + + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_fullx_fully_8x8() */ +/* */ +/* Description : Gets the buffer from (x,y) to (x+8,y+8) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) position in the ref frame */ +/* Get an 8 x 8 block from reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +void impeg2_mc_fullx_fully_8x8(UWORD8 *pu1_out, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD32 u4_out_wid) +{ + + UWORD32 i; + + for(i = 0; i < BLK_SIZE; i++) + { + memcpy(pu1_out, pu1_ref, BLK_SIZE); + pu1_ref += u4_ref_wid; + pu1_out += u4_out_wid; + } + return; +} diff --git a/common/impeg2_inter_pred.h b/common/impeg2_inter_pred.h new file mode 100644 index 0000000..be3b0e5 --- /dev/null +++ b/common/impeg2_inter_pred.h @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_INTER_PRED_H__ +#define __IMPEG2_INTER_PRED_H__ + + +typedef struct +{ + UWORD8 *pu1_y; + UWORD8 *pu1_u; + UWORD8 *pu1_v; +}yuv_buf_t; + +typedef struct +{ + WORD16 *pi2_y; + WORD16 *pi2_u; + WORD16 *pi2_v; +}yuv_buf16_t; + +/** + * Picture buffer + */ +typedef struct +{ + UWORD8 *pu1_y; + UWORD8 *pu1_u; + UWORD8 *pu1_v; + + /** Used to store display Timestamp for current buffer */ + WORD32 u4_ts; + UWORD8 u1_used_as_ref; + + /** + * buffer ID from buffer manager + */ + WORD32 i4_buf_id; + +}pic_buf_t; + +typedef void pf_copy_mb_t (yuv_buf_t *src_buf, + yuv_buf_t *dst_buf, + UWORD32 src_wd, + UWORD32 dst_wd); + +typedef void pf_interpred_t(UWORD8 *out,UWORD8 *ref, UWORD32 ref_wid, UWORD32 out_wid); + +typedef void pf_interpolate_t(yuv_buf_t *buf_src1, + yuv_buf_t *buf_src2, + yuv_buf_t *buf_dst, + UWORD32 stride); + +pf_interpolate_t impeg2_interpolate; +pf_interpolate_t impeg2_interpolate_a9q; +pf_interpolate_t impeg2_interpolate_av8; + +pf_copy_mb_t impeg2_copy_mb; +pf_copy_mb_t impeg2_copy_mb_a9q; +pf_copy_mb_t impeg2_copy_mb_av8; + +pf_interpred_t impeg2_mc_halfx_halfy_8x8; +pf_interpred_t impeg2_mc_halfx_fully_8x8; +pf_interpred_t impeg2_mc_fullx_halfy_8x8; +pf_interpred_t impeg2_mc_fullx_fully_8x8; + +pf_interpred_t impeg2_mc_halfx_halfy_8x8_a9q; +pf_interpred_t impeg2_mc_halfx_fully_8x8_a9q; +pf_interpred_t impeg2_mc_fullx_halfy_8x8_a9q; +pf_interpred_t impeg2_mc_fullx_fully_8x8_a9q; + +/* AV8 Declarations */ +pf_interpred_t impeg2_mc_halfx_halfy_8x8_av8; +pf_interpred_t impeg2_mc_halfx_fully_8x8_av8; +pf_interpred_t impeg2_mc_fullx_halfy_8x8_av8; +pf_interpred_t impeg2_mc_fullx_fully_8x8_av8; + + +/* SSE4.2 Declarations*/ +pf_copy_mb_t impeg2_copy_mb_sse42; +pf_interpolate_t impeg2_interpolate_sse42; +pf_interpred_t impeg2_mc_halfx_halfy_8x8_sse42; +pf_interpred_t impeg2_mc_halfx_fully_8x8_sse42; +pf_interpred_t impeg2_mc_fullx_halfy_8x8_sse42; +pf_interpred_t impeg2_mc_fullx_fully_8x8_sse42; + +#endif /* #ifndef __IMPEG2_INTER_PRED_H__ */ diff --git a/common/impeg2_job_queue.c b/common/impeg2_job_queue.c new file mode 100644 index 0000000..d36ce7c --- /dev/null +++ b/common/impeg2_job_queue.c @@ -0,0 +1,530 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_job_queue.c +* +* @brief +* Contains functions for job queue +* +* @author +* Harish +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "iv_datatypedef.h" +#include "iv.h" +#include "ithread.h" +#include "impeg2_macros.h" +#include "impeg2_job_queue.h" + +/** +******************************************************************************* +* +* @brief Returns size for job queue context. Does not include job queue buffer +* requirements +* +* @par Description +* Returns size for job queue context. Does not include job queue buffer +* requirements. Buffer size required to store the jobs should be allocated in +* addition to the value returned here. +* +* @returns Size of the job queue context +* +* @remarks +* +******************************************************************************* +*/ +WORD32 impeg2_jobq_ctxt_size() +{ + WORD32 i4_size; + i4_size = sizeof(jobq_t); + i4_size += ithread_get_mutex_lock_size(); + return i4_size; +} + +/** +******************************************************************************* +* +* @brief +* Locks the jobq conext +* +* @par Description +* Locks the jobq conext by calling ithread_mutex_lock() +* +* @param[in] ps_jobq +* Job Queue context +* +* @returns IMPEG2D_FAIL if mutex lock fails else IV_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2_jobq_lock(jobq_t *ps_jobq) +{ + WORD32 i4_ret_val; + i4_ret_val = ithread_mutex_lock(ps_jobq->pv_mutex); + if(i4_ret_val) + { + return IV_FAIL; + } + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Unlocks the jobq conext +* +* @par Description +* Unlocks the jobq conext by calling ithread_mutex_unlock() +* +* @param[in] ps_jobq +* Job Queue context +* +* @returns IMPEG2D_FAIL if mutex unlock fails else IV_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ + +IV_API_CALL_STATUS_T impeg2_jobq_unlock(jobq_t *ps_jobq) +{ + WORD32 i4_ret_val; + i4_ret_val = ithread_mutex_unlock(ps_jobq->pv_mutex); + if(i4_ret_val) + { + return IV_FAIL; + } + return IV_SUCCESS; + +} +/** +******************************************************************************* +* +* @brief +* Yeilds the thread +* +* @par Description +* Unlocks the jobq conext by calling +* impeg2_jobq_unlock(), ithread_yield() and then impeg2_jobq_lock() +* jobq is unlocked before to ensure the jobq can be accessed by other threads +* If unlock is not done before calling yield then no other thread can access +* the jobq functions and update jobq. +* +* @param[in] ps_jobq +* Job Queue context +* +* @returns IMPEG2D_FAIL if mutex lock unlock or yield fails else IV_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2_jobq_yield(jobq_t *ps_jobq) +{ + + IV_API_CALL_STATUS_T e_ret = IV_SUCCESS; + + IV_API_CALL_STATUS_T e_ret_tmp; + e_ret_tmp = impeg2_jobq_unlock(ps_jobq); + RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp); + + //NOP(1024 * 8); + ithread_yield(); + + e_ret_tmp = impeg2_jobq_lock(ps_jobq); + RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp); + return e_ret; +} + + +/** +******************************************************************************* +* +* @brief free the job queue pointers +* +* @par Description +* Frees the jobq context +* +* @param[in] pv_buf +* Memoy for job queue buffer and job queue context +* +* @returns Pointer to job queue context +* +* @remarks +* Since it will be called only once by master thread this is not thread safe. +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2_jobq_free(jobq_t *ps_jobq) +{ + WORD32 i4_ret; + i4_ret = ithread_mutex_destroy(ps_jobq->pv_mutex); + + if(0 == i4_ret) + return IV_SUCCESS; + else + return IV_FAIL; +} + +/** +******************************************************************************* +* +* @brief Initialize the job queue +* +* @par Description +* Initializes the jobq context and sets write and read pointers to start of +* job queue buffer +* +* @param[in] pv_buf +* Memoy for job queue buffer and job queue context +* +* @param[in] buf_size +* Size of the total memory allocated +* +* @returns Pointer to job queue context +* +* @remarks +* Since it will be called only once by master thread this is not thread safe. +* +******************************************************************************* +*/ +void* impeg2_jobq_init(void *pv_buf, WORD32 i4_buf_size) +{ + jobq_t *ps_jobq; + UWORD8 *pu1_buf; + pu1_buf = (UWORD8 *)pv_buf; + + ps_jobq = (jobq_t *)pu1_buf; + pu1_buf += sizeof(jobq_t); + i4_buf_size -= sizeof(jobq_t); + + ps_jobq->pv_mutex = pu1_buf; + pu1_buf += ithread_get_mutex_lock_size(); + i4_buf_size -= ithread_get_mutex_lock_size(); + + if(i4_buf_size <= 0) + return NULL; + + ithread_mutex_init(ps_jobq->pv_mutex); + + ps_jobq->pv_buf_base = pu1_buf; + ps_jobq->pv_buf_wr = pu1_buf; + ps_jobq->pv_buf_rd = pu1_buf; + ps_jobq->pv_buf_end = pu1_buf + i4_buf_size; + ps_jobq->i4_terminate = 0; + + + return ps_jobq; +} +/** +******************************************************************************* +* +* @brief +* Resets the jobq conext +* +* @par Description +* Resets the jobq conext by initilizing job queue context elements +* +* @param[in] ps_jobq +* Job Queue context +* +* @returns IMPEG2D_FAIL if lock unlock fails else IV_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2_jobq_reset(jobq_t *ps_jobq) +{ + IV_API_CALL_STATUS_T e_ret = IV_SUCCESS; + e_ret = impeg2_jobq_lock(ps_jobq); + RETURN_IF((e_ret != IV_SUCCESS), e_ret); + + ps_jobq->pv_buf_wr = ps_jobq->pv_buf_base; + ps_jobq->pv_buf_rd = ps_jobq->pv_buf_base; + ps_jobq->i4_terminate = 0; + e_ret = impeg2_jobq_unlock(ps_jobq); + RETURN_IF((e_ret != IV_SUCCESS), e_ret); + + return e_ret; +} + +/** +******************************************************************************* +* +* @brief +* Deinitializes the jobq conext +* +* @par Description +* Deinitializes the jobq conext by calling impeg2_jobq_reset() +* and then destrying the mutex created +* +* @param[in] ps_jobq +* Job Queue context +* +* @returns IMPEG2D_FAIL if lock unlock fails else IV_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2_jobq_deinit(jobq_t *ps_jobq) +{ + WORD32 i4_ret_val; + IV_API_CALL_STATUS_T e_ret = IV_SUCCESS; + + e_ret = impeg2_jobq_reset(ps_jobq); + RETURN_IF((e_ret != IV_SUCCESS), e_ret); + + i4_ret_val = ithread_mutex_destroy(ps_jobq->pv_mutex); + if(i4_ret_val) + { + return IV_FAIL; + } + + return IV_SUCCESS; +} + + +/** +******************************************************************************* +* +* @brief +* Terminates the jobq +* +* @par Description +* Terminates the jobq by setting a flag in context. +* +* @param[in] ps_jobq +* Job Queue context +* +* @returns IMPEG2D_FAIL if lock unlock fails else IV_SUCCESS +* +* @remarks +* +******************************************************************************* +*/ + +IV_API_CALL_STATUS_T impeg2_jobq_terminate(jobq_t *ps_jobq) +{ + IV_API_CALL_STATUS_T e_ret = IV_SUCCESS; + e_ret = impeg2_jobq_lock(ps_jobq); + RETURN_IF((e_ret != IV_SUCCESS), e_ret); + + ps_jobq->i4_terminate = 1; + + e_ret = impeg2_jobq_unlock(ps_jobq); + RETURN_IF((e_ret != IV_SUCCESS), e_ret); + return e_ret; +} + + +/** +******************************************************************************* +* +* @brief Adds a job to the queue +* +* @par Description +* Adds a job to the queue and updates wr address to next location. +* Format/content of the job structure is abstracted and hence size of the job +* buffer is being passed. +* +* @param[in] ps_jobq +* Job Queue context +* +* @param[in] pv_job +* Pointer to the location that contains details of the job to be added +* +* @param[in] job_size +* Size of the job buffer +* +* @param[in] blocking +* To signal if the write is blocking or non-blocking. +* +* @returns +* +* @remarks +* Job Queue buffer is assumed to be allocated to handle worst case number of jobs +* Wrap around is not supported +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2_jobq_queue(jobq_t *ps_jobq, + void *pv_job, + WORD32 i4_job_size, + WORD32 i4_blocking, + WORD32 i4_lock) +{ + IV_API_CALL_STATUS_T e_ret = IV_SUCCESS; + IV_API_CALL_STATUS_T e_ret_tmp; + UWORD8 *pu1_buf; + UNUSED(i4_blocking); + + if(i4_lock) + { + e_ret_tmp = impeg2_jobq_lock(ps_jobq); + RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp); + } + pu1_buf = (UWORD8 *)ps_jobq->pv_buf_wr; + if((UWORD8 *)ps_jobq->pv_buf_end >= (pu1_buf + i4_job_size)) + { + memcpy(ps_jobq->pv_buf_wr, pv_job, i4_job_size); + ps_jobq->pv_buf_wr = (UWORD8 *)ps_jobq->pv_buf_wr + i4_job_size; + e_ret = IV_SUCCESS; + } + else + { + /* Handle wrap around case */ + /* Wait for pv_buf_rd to consume first job_size number of bytes + * from the beginning of job queue + */ + e_ret = IV_FAIL; + } + + ps_jobq->i4_terminate = 0; + + if(i4_lock) + { + e_ret_tmp = impeg2_jobq_unlock(ps_jobq); + RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp); + } + + return e_ret; +} +/** +******************************************************************************* +* +* @brief Gets next from the Job queue +* +* @par Description +* Gets next job from the job queue and updates rd address to next location. +* Format/content of the job structure is abstracted and hence size of the job +* buffer is being passed. If it is a blocking call and if there is no new job +* then this functions unlocks the mutext and calls yield and then locks it back. +* and continues till a job is available or terminate is set +* +* @param[in] ps_jobq +* Job Queue context +* +* @param[out] pv_job +* Pointer to the location that contains details of the job to be written +* +* @param[in] job_size +* Size of the job buffer +* +* @param[in] blocking +* To signal if the read is blocking or non-blocking. +* +* @returns +* +* @remarks +* Job Queue buffer is assumed to be allocated to handle worst case number of jobs +* Wrap around is not supported +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2_jobq_dequeue(jobq_t *ps_jobq, + void *pv_job, + WORD32 i4_job_size, + WORD32 i4_blocking, + WORD32 i4_lock) +{ + IV_API_CALL_STATUS_T e_ret; + IV_API_CALL_STATUS_T e_ret_tmp; + volatile UWORD8 *pu1_buf; + if(i4_lock) + { + e_ret_tmp = impeg2_jobq_lock(ps_jobq); + RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp); + } + pu1_buf = (UWORD8 *)ps_jobq->pv_buf_rd; + + + if((UWORD8 *)ps_jobq->pv_buf_end >= (pu1_buf + i4_job_size)) + { + while(1) + { + pu1_buf = (UWORD8 *)ps_jobq->pv_buf_rd; + if((UWORD8 *)ps_jobq->pv_buf_wr >= (pu1_buf + i4_job_size)) + { + memcpy(pv_job, ps_jobq->pv_buf_rd, i4_job_size); + ps_jobq->pv_buf_rd = (UWORD8 *)ps_jobq->pv_buf_rd + i4_job_size; + e_ret = IV_SUCCESS; + break; + } + else + { + /* If all the entries have been dequeued, then break and return */ + if(1 == ps_jobq->i4_terminate) + { + e_ret = IV_FAIL; + break; + } + + if((1 == i4_blocking) && (1 == i4_lock)) + { + impeg2_jobq_yield(ps_jobq); + + } + else + { + /* If there is no job available, + * and this is non blocking call then return fail */ + e_ret = IV_FAIL; + } + } + } + } + else + { + /* Handle wrap around case */ + /* Wait for pv_buf_rd to consume first i4_job_size number of bytes + * from the beginning of job queue + */ + e_ret = IV_FAIL; + } + if(i4_lock) + { + e_ret_tmp = impeg2_jobq_unlock(ps_jobq); + RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp); + } + + return e_ret; +} diff --git a/common/impeg2_job_queue.h b/common/impeg2_job_queue.h new file mode 100644 index 0000000..46d8bb9 --- /dev/null +++ b/common/impeg2_job_queue.h @@ -0,0 +1,72 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2_job_queue.h +* +* @brief +* Contains functions for job queue +* +* @author +* Harish +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IMPEG2_JOB_QUEUE_H_ +#define _IMPEG2_JOB_QUEUE_H_ + +typedef struct +{ + /** Pointer to buffer base which contains the jobs */ + void *pv_buf_base; + + /** Pointer to current address where new job can be added */ + void *pv_buf_wr; + + /** Pointer to current address from where next job can be obtained */ + void *pv_buf_rd; + + /** Pointer to end of job buffer */ + void *pv_buf_end; + + /** Mutex used to keep the functions thread-safe */ + void *pv_mutex; + + /** Flag to indicate jobq has to be terminated */ + WORD32 i4_terminate; +}jobq_t; + +WORD32 impeg2_jobq_ctxt_size(void); +void* impeg2_jobq_init(void *pv_buf, WORD32 buf_size); +IV_API_CALL_STATUS_T impeg2_jobq_free(jobq_t *ps_jobq); +IV_API_CALL_STATUS_T impeg2_jobq_reset(jobq_t *ps_jobq); +IV_API_CALL_STATUS_T impeg2_jobq_deinit(jobq_t *ps_jobq); +IV_API_CALL_STATUS_T impeg2_jobq_terminate(jobq_t *ps_jobq); +IV_API_CALL_STATUS_T impeg2_jobq_queue(jobq_t *ps_jobq, void *pv_job, WORD32 job_size, WORD32 blocking, WORD32 lock); +IV_API_CALL_STATUS_T impeg2_jobq_dequeue(jobq_t *ps_jobq, void *pv_job, WORD32 job_size, WORD32 blocking, WORD32 lock); + +#endif /* _IMPEG2_JOB_QUEUE_H_ */ diff --git a/common/impeg2_macros.h b/common/impeg2_macros.h new file mode 100644 index 0000000..366510f --- /dev/null +++ b/common/impeg2_macros.h @@ -0,0 +1,60 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_MACROS_H__ +#define __IMPEG2_MACROS_H__ + +#define ABS(x) ((x) < 0 ? (-1 * (x)) : (x)) + +#define MAX(x,y) ((x) > (y) ? (x) : (y)) + +#define MIN(x,y) ((x) < (y) ? (x) : (y)) + +#define CLIP(Number,Max,Min) if((Number) > (Max)) (Number) = (Max); \ +else if((Number) < (Min)) (Number) = (Min) + +#define SIGN(Number) (((Number) < 0) ? -1 : 1) + + +#define BITS(val,msb,lsb) (UWORD16)((((val) >> (lsb)) & ((1 << ((msb) - (lsb) + 1)) - 1))) + +#define BIT(val,bit) (UWORD16)(((val) >> (bit)) & 0x1) + +#define IS_VAL_IN_RANGE(val,upperLimit,lowerLimit) ((val) >= (lowerLimit) && (val) <= (upperLimit)) + +#define MSW(dword) (dword >> 16) +#define LSW(dword) (dword & 0xFFFF) +#define DIV_2_RND(mv) (((mv) + ((mv) > 0)) >> 1) +#define IS_NEG(Number) (((Number) < 0) ? 1 : 0) + +#define ALIGN128(x) ((((x) + 127) >> 7) << 7) +#define ALIGN64(x) ((((x) + 63) >> 6) << 6) +#define ALIGN32(x) ((((x) + 31) >> 5) << 5) +#define ALIGN16(x) ((((x) + 15) >> 4) << 4) +#define ALIGN8(x) ((((x) + 7) >> 3) << 3) + + +#define RETURN_IF(cond, retval) if(cond) {return (retval);} +#define UNUSED(x) ((void)(x)) + + +#define ASSERT(x) assert(x) + + +#endif /* __IMPEG2_IT_MACROS_H__ */ diff --git a/common/impeg2_mem_func.c b/common/impeg2_mem_func.c new file mode 100644 index 0000000..9268c01 --- /dev/null +++ b/common/impeg2_mem_func.c @@ -0,0 +1,87 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2_utils.c +* +* @brief +* Contains utility function definitions for MPEG2 codec +* +* @author +* Harish +* +* @par List of Functions: +* - impeg2_memset0_16bit_8x8_linear_block() +* - impeg2_memset_8bit_8x8_block() +* +* @remarks +* None +* +******************************************************************************* +*/ + +#include <stdio.h> +#include <string.h> +#include "iv_datatypedef.h" +#include "impeg2_defs.h" + +/******************************************************************************* +* Function Name : impeg2_memset0_16bit_8x8_linear_block +* +* Description : memsets resudial buf to 0 +* +* Arguments : destination buffer +* +* Values Returned : None +*******************************************************************************/ + + +void impeg2_memset0_16bit_8x8_linear_block (WORD16 *pi2_buf) +{ + memset(pi2_buf,0,64 * sizeof(WORD16)); +} + + + +/******************************************************************************* +* Function Name : impeg2_memset_8bit_8x8_block +* +* Description : memsets residual buf to value +* +* Arguments : destination buffer, value and stride +* +* Values Returned : None +*******************************************************************************/ + + +void impeg2_memset_8bit_8x8_block(UWORD8 *pu1_dst, WORD32 u4_dc_val, WORD32 u4_dst_wd) +{ + WORD32 j; + + for(j = BLK_SIZE; j > 0; j--) + { + memset(pu1_dst, u4_dc_val, BLK_SIZE); + pu1_dst += u4_dst_wd; + } +} + + + diff --git a/common/impeg2_mem_func.h b/common/impeg2_mem_func.h new file mode 100644 index 0000000..f73702c --- /dev/null +++ b/common/impeg2_mem_func.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#ifndef IMPEG2_MEM_FUNC_H_ +#define IMPEG2_MEM_FUNC_H_ + +typedef void pf_memset0_one_16bit_buf_t (WORD16 *buf); +typedef void pf_memset_8bit_t (UWORD8 *dst, WORD32 dc_val, WORD32 dst_wd); + +pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block; +pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block_a9q; + +pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block_sse42; + +pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block_av8; + +pf_memset_8bit_t impeg2_memset_8bit_8x8_block; +pf_memset_8bit_t impeg2_memset_8bit_8x8_block_a9q; + +pf_memset_8bit_t impeg2_memset_8bit_8x8_block_sse42; + +pf_memset_8bit_t impeg2_memset_8bit_8x8_block_av8; + +#endif /* IMPEG2_MEM_FUNC_H_ */ diff --git a/common/ithread.c b/common/ithread.c new file mode 100644 index 0000000..76fdad3 --- /dev/null +++ b/common/ithread.c @@ -0,0 +1,453 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : ithread.c */ +/* */ +/* Description : Contains abstraction for threads, mutex and semaphores*/ +/* */ +/* List of Functions : */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 Harish Initial Version */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <string.h> +#include "iv_datatypedef.h" +#include "ithread.h" +#include <sys/types.h> + +#ifndef X86_MSVC +//#define PTHREAD_AFFINITY +//#define SYSCALL_AFFINITY + +#ifdef PTHREAD_AFFINITY +#define _GNU_SOURCE +#define __USE_GNU +#endif + +#include <pthread.h> +#include <sched.h> +#include <semaphore.h> +#include <unistd.h> + + +#endif +#if 0 +#include <sys/syscall.h> +#endif + +#ifdef X86_MSVC + +#include <windows.h> +#define SEM_MAX_COUNT 100 +#define SEM_INCREMENT_COUNT 1 + +UWORD32 ithread_get_handle_size(void) +{ + return (sizeof(HANDLE)); +} + +UWORD32 ithread_get_mutex_lock_size(void) +{ + return (sizeof(HANDLE)); +} + +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument) +{ + HANDLE *ppv_thread_handle; + HANDLE thread_handle_value; + + if(0 == thread_handle) + return -1; + + ppv_thread_handle = (HANDLE *)thread_handle; + thread_handle_value = (void *)CreateThread + (NULL, /* Attributes */ + 1024*128, /* Stack size */ + (LPTHREAD_START_ROUTINE)strt, /* Thread function */ + argument, /* Parameters */ + 0, /* Creation flags */ + NULL); /* Thread ID */ + *ppv_thread_handle = (HANDLE)thread_handle_value; + + return 0; +} + +WORD32 ithread_join(void *thread_handle, void ** val_ptr) +{ + HANDLE *ppv_thread_handle; + HANDLE thread_handle_value; + + if(0 == thread_handle) + return -1; + + ppv_thread_handle = (HANDLE *)thread_handle; + thread_handle_value = *ppv_thread_handle; + + if(WAIT_OBJECT_0 == WaitForSingleObject(thread_handle_value, INFINITE)) + { + CloseHandle(thread_handle_value); + } + + return 0; +} + +void ithread_exit(void *thread_handle) +{ + HANDLE *ppv_thread_handle; + HANDLE thread_handle_value; + DWORD thread_exit_code; + + if(0 == thread_handle) + return; + + ppv_thread_handle = (HANDLE *)thread_handle; + thread_handle_value = *ppv_thread_handle; + /* Get exit code for thread. If the return value is 0, means thread is busy */ + if( 0 != GetExitCodeThread(thread_handle_value, &thread_exit_code)) + { + TerminateThread(thread_handle_value, thread_exit_code); + } + + return; +} + +WORD32 ithread_get_mutex_struct_size(void) +{ + return (sizeof(HANDLE)); +} + +WORD32 ithread_mutex_init(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = CreateSemaphore(NULL, 1, 1, NULL); + *ppv_mutex_handle = mutex_handle_value; + return 0; +} + +WORD32 ithread_mutex_destroy(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = *ppv_mutex_handle; + CloseHandle(mutex_handle_value); + return 0; +} + +WORD32 ithread_mutex_lock(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + DWORD result = 0; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = *ppv_mutex_handle; + result = WaitForSingleObject(mutex_handle_value, INFINITE); + + if(WAIT_OBJECT_0 == result) + return 0; + + return 1; + +} + +WORD32 ithread_mutex_unlock(void *mutex) +{ + HANDLE *ppv_mutex_handle; + HANDLE mutex_handle_value; + DWORD result = 0; + + if(0 == mutex) + return -1; + + ppv_mutex_handle = (HANDLE *)mutex; + mutex_handle_value = *ppv_mutex_handle; + result = ReleaseSemaphore(mutex_handle_value, 1, NULL); + + if(0 == result) + return -1; + + return 0; +} + +void ithread_yield(void) { } + +void ithread_usleep(UWORD32 u4_time_us) +{ + UWORD32 u4_time_ms = u4_time_us / 1000; + Sleep(u4_time_ms); +} + +void ithread_msleep(UWORD32 u4_time_ms) +{ + Sleep(u4_time_ms); +} + +void ithread_sleep(UWORD32 u4_time) +{ + UWORD32 u4_time_ms = u4_time * 1000; + Sleep(u4_time_ms); +} + +UWORD32 ithread_get_sem_struct_size(void) +{ + return (sizeof(HANDLE)); +} + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value) +{ + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = CreateSemaphore(NULL, /* Security Attribute*/ + value, /* Initial count */ + SEM_MAX_COUNT,/* Max value */ + NULL); /* Name, not used */ + *sem_handle = sem_handle_value; + return 0; +} + +WORD32 ithread_sem_post(void *sem) +{ + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = *sem_handle; + + /* Post on Semaphore by releasing the lock on mutex */ + if(ReleaseSemaphore(sem_handle_value, SEM_INCREMENT_COUNT, NULL)) + return 0; + + return -1; +} + +WORD32 ithread_sem_wait(void *sem) +{ + DWORD result = 0; + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = *sem_handle; + + /* Wait on Semaphore object infinitly */ + result = WaitForSingleObject(sem_handle_value, INFINITE); + + /* If lock on semaphore is acquired, return SUCCESS */ + if(WAIT_OBJECT_0 == result) + return 0; + + /* If call timeouts, return FAILURE */ + if(WAIT_TIMEOUT == result) + return -1; + + return 0; +} + +WORD32 ithread_sem_destroy(void *sem) +{ + HANDLE *sem_handle = (HANDLE *)sem; + HANDLE sem_handle_value; + + if(0 == sem) + return -1; + + sem_handle_value = *sem_handle; + + if(FALSE == CloseHandle(sem_handle_value) ) + { + return -1; + } + return 0; +} + +WORD32 ithread_set_affinity(WORD32 core_id) +{ + return 1; +} + +#else +UWORD32 ithread_get_handle_size(void) +{ + return sizeof(pthread_t); +} + +UWORD32 ithread_get_mutex_lock_size(void) +{ + return sizeof(pthread_mutex_t); +} + + +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument) +{ + ((void)(attribute)); + return pthread_create((pthread_t *)thread_handle, NULL,(void *(*)(void *)) strt, argument); +} + +WORD32 ithread_join(void *thread_handle, void ** val_ptr) +{ + pthread_t *pthread_handle = (pthread_t *)thread_handle; + ((void)(val_ptr)); + return pthread_join(*pthread_handle, NULL); +} + +void ithread_exit(void *val_ptr) +{ +return pthread_exit(val_ptr); +} + +WORD32 ithread_get_mutex_struct_size(void) +{ + return(sizeof(pthread_mutex_t)); +} +WORD32 ithread_mutex_init(void *mutex) +{ + return pthread_mutex_init((pthread_mutex_t *) mutex, NULL); +} + +WORD32 ithread_mutex_destroy(void *mutex) +{ + return pthread_mutex_destroy((pthread_mutex_t *) mutex); +} + +WORD32 ithread_mutex_lock(void *mutex) +{ + return pthread_mutex_lock((pthread_mutex_t *)mutex); +} + +WORD32 ithread_mutex_unlock(void *mutex) +{ + return pthread_mutex_unlock((pthread_mutex_t *)mutex); +} + +void ithread_yield(void) +{ + sched_yield(); +} + +void ithread_sleep(UWORD32 u4_time) +{ + usleep(u4_time * 1000 * 1000); +} + +void ithread_msleep(UWORD32 u4_time_ms) +{ + usleep(u4_time_ms * 1000); +} + +void ithread_usleep(UWORD32 u4_time_us) +{ + usleep(u4_time_us); +} + +UWORD32 ithread_get_sem_struct_size(void) +{ + return(sizeof(sem_t)); +} + + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value) +{ + return sem_init((sem_t *)sem,pshared,value); +} + +WORD32 ithread_sem_post(void *sem) +{ + return sem_post((sem_t *)sem); +} + + +WORD32 ithread_sem_wait(void *sem) +{ + return sem_wait((sem_t *)sem); +} + + +WORD32 ithread_sem_destroy(void *sem) +{ +return sem_destroy((sem_t *)sem); +} + + +WORD32 ithread_set_affinity(WORD32 core_id) +{ +#ifdef PTHREAD_AFFINITY + cpu_set_t cpuset; + int num_cores = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t cur_thread = pthread_self(); + + if (core_id >= num_cores) + return -1; + + CPU_ZERO(&cpuset); + CPU_SET(core_id, &cpuset); + + return pthread_setaffinity_np(cur_thread, sizeof(cpu_set_t), &cpuset); + +#elif SYSCALL_AFFINITY + WORD32 i4_sys_res; + + pid_t pid = gettid(); + + + i4_sys_res = syscall(__NR_sched_setaffinity, pid, sizeof(i4_mask), &i4_mask); + if (i4_sys_res) + { + //WORD32 err; + //err = errno; + //perror("Error in setaffinity syscall PERROR : "); + //LOG_ERROR("Error in the syscall setaffinity: mask=0x%x err=0x%x", i4_mask, i4_sys_res); + return -1; + } +#endif + ((void)(core_id)); + return 1; + +} +#endif diff --git a/common/ithread.h b/common/ithread.h new file mode 100644 index 0000000..eb75d20 --- /dev/null +++ b/common/ithread.h @@ -0,0 +1,80 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ithread.h +* +* @brief +* This file contains all the necessary structure and enumeration +* definitions needed for the Application Program Interface(API) of the +* Thread Abstraction Layer +* +* @author +* Harish +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ITHREAD_H__ +#define __ITHREAD_H__ + +UWORD32 ithread_get_handle_size(void); + +UWORD32 ithread_get_mutex_lock_size(void); + +WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument); + +void ithread_exit(void *val_ptr); + +WORD32 ithread_join(void *thread_id, void ** val_ptr); + +WORD32 ithread_get_mutex_struct_size(void); + +WORD32 ithread_mutex_init(void *mutex); + +WORD32 ithread_mutex_destroy(void *mutex); + +WORD32 ithread_mutex_lock(void *mutex); + +WORD32 ithread_mutex_unlock(void *mutex); + +void ithread_yield(void); + +void ithread_sleep(UWORD32 u4_time); + +void ithread_msleep(UWORD32 u4_time_ms); + +void ithread_usleep(UWORD32 u4_time_us); + +UWORD32 ithread_get_sem_struct_size(void); + +WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value); + +WORD32 ithread_sem_post(void *sem); + +WORD32 ithread_sem_wait(void *sem); + +WORD32 ithread_sem_destroy(void *sem); + +WORD32 ithread_set_affinity(WORD32 core_id); +#endif /* __ITHREAD_H__ */ diff --git a/common/iv.h b/common/iv.h new file mode 100644 index 0000000..3941497 --- /dev/null +++ b/common/iv.h @@ -0,0 +1,420 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* iv.h +* +* @brief +* This file contains all the necessary structure and enumeration +* definitions needed for the Application Program Interface(API) of the +* Ittiam Video and Image codecs +* +* @author +* 100239(RCY) +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + + +#ifndef _IV_H +#define _IV_H + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + + +/* IV_API_CALL_STATUS_T:This is only to return the FAIL/PASS status to the */ +/* application for the current API call */ + +typedef enum{ + IV_STATUS_NA = 0x7FFFFFFF, + IV_SUCCESS = 0x0, + IV_FAIL = 0x1, +}IV_API_CALL_STATUS_T; + +/* IV_MEM_TYPE_T: This Enumeration defines the type of memory (Internal/Ext */ +/* -ernal) along with the cacheable/non-cacheable attributes */ + +typedef enum { + IV_NA_MEM_TYPE = 0x7FFFFFFF, + IV_INTERNAL_CACHEABLE_PERSISTENT_MEM = 0x1, + IV_INTERNAL_CACHEABLE_SCRATCH_MEM = 0x2, + IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM = 0x3, + IV_EXTERNAL_CACHEABLE_SCRATCH_MEM = 0x4, + IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x5, + IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x6, + IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x7, + IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x8 +}IV_MEM_TYPE_T; + +/* IV_COLOR_FORMAT_T: This enumeration lists all the color formats which */ +/* finds usage in video/image codecs */ + +typedef enum { + IV_CHROMA_NA = 0x7FFFFFFF, + IV_YUV_420P = 0x1, + IV_YUV_422P = 0x2, + IV_420_UV_INTL = 0x3, + IV_YUV_422IBE = 0x4, + IV_YUV_422ILE = 0x5, + IV_YUV_444P = 0x6, + IV_YUV_411P = 0x7, + IV_GRAY = 0x8, + IV_RGB_565 = 0x9, + IV_RGB_24 = 0xa, + IV_YUV_420SP_UV = 0xb, + IV_YUV_420SP_VU = 0xc, + IV_RGBA_8888 = 0xd +}IV_COLOR_FORMAT_T; + +/* IV_PICTURE_CODING_TYPE_T: VOP/Frame coding type Enumeration */ + +typedef enum { + IV_NA_FRAME = 0x7FFFFFFF, + IV_I_FRAME = 0x0, + IV_P_FRAME = 0x1, + IV_B_FRAME = 0x2, + IV_IDR_FRAME = 0x3, + IV_II_FRAME = 0x4, + IV_IP_FRAME = 0x5, + IV_IB_FRAME = 0x6, + IV_PI_FRAME = 0x7, + IV_PP_FRAME = 0x8, + IV_PB_FRAME = 0x9, + IV_BI_FRAME = 0xa, + IV_BP_FRAME = 0xb, + IV_BB_FRAME = 0xc, + IV_MBAFF_I_FRAME = 0xd, + IV_MBAFF_P_FRAME = 0xe, + IV_MBAFF_B_FRAME = 0xf, + IV_MBAFF_IDR_FRAME = 0x10, + IV_NOT_CODED_FRAME = 0x11, + IV_FRAMETYPE_DEFAULT = IV_I_FRAME +}IV_PICTURE_CODING_TYPE_T; + +/* IV_FLD_TYPE_T: field type Enumeration */ + +typedef enum { + IV_NA_FLD = 0x7FFFFFFF, + IV_TOP_FLD = 0x0, + IV_BOT_FLD = 0x1, + IV_FLD_TYPE_DEFAULT = IV_TOP_FLD +}IV_FLD_TYPE_T; + +/* IV_CONTENT_TYPE_T: Video content type */ + +typedef enum { + IV_CONTENTTYPE_NA = 0x7FFFFFFF, + IV_PROGRESSIVE = 0x0, + IV_INTERLACED = 0x1, + IV_PROGRESSIVE_FRAME = 0x2, + IV_INTERLACED_FRAME = 0x3, + IV_INTERLACED_TOPFIELD = 0x4, + IV_INTERLACED_BOTTOMFIELD = 0x5, + IV_CONTENTTYPE_DEFAULT = IV_PROGRESSIVE, +}IV_CONTENT_TYPE_T; + +/* IV_API_COMMAND_TYPE_T:API command type */ +typedef enum { + IV_CMD_NA = 0x7FFFFFFF, + IV_CMD_GET_NUM_MEM_REC = 0x0, + IV_CMD_FILL_NUM_MEM_REC = 0x1, + IV_CMD_RETRIEVE_MEMREC = 0x2, + IV_CMD_INIT = 0x3, + IV_CMD_DUMMY_ELEMENT = 0x4, +}IV_API_COMMAND_TYPE_T; + +/*****************************************************************************/ +/* Structure */ +/*****************************************************************************/ + +/* IV_OBJ_T: This structure defines the handle for the codec instance */ + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * Pointer to the API function pointer table of the codec + */ + void *pv_fxns; + + /** + * Pointer to the handle of the codec + */ + void *pv_codec_handle; +}iv_obj_t; + +/* iv_mem_rec_t: This structure defines the memory record holder which will */ +/* be used by the codec to communicate its memory requirements to the */ +/* application through appropriate API functions */ + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * Pointer to the memory allocated by the application + */ + void *pv_base; + + /** + * u4_size of the memory to be allocated + */ + UWORD32 u4_mem_size; + + /** + * Alignment of the memory pointer + */ + UWORD32 u4_mem_alignment; + /** + * Nature of the memory to be allocated + */ + IV_MEM_TYPE_T e_mem_type; +}iv_mem_rec_t; + +/* IV_YUV_BUF_T: This structure defines attributes for the yuv buffer */ + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * Pointer to Luma (Y) Buffer + */ + + void *pv_y_buf; + /** + * Pointer to Chroma (Cb) Buffer + */ + void *pv_u_buf; + + /** + * Pointer to Chroma (Cr) Buffer + */ + void *pv_v_buf; + + /** + * Width of the Luma (Y) Buffer + */ + UWORD32 u4_y_wd; + + /** + * Height of the Luma (Y) Buffer + */ + UWORD32 u4_y_ht; + + /** + * Stride/Pitch of the Luma (Y) Buffer + */ + UWORD32 u4_y_strd; + + /** + * Width of the Chroma (Cb) Buffer + */ + UWORD32 u4_u_wd; + + /** + * Height of the Chroma (Cb) Buffer + */ + UWORD32 u4_u_ht; + + /** + * Stride/Pitch of the Chroma (Cb) Buffer + */ + UWORD32 u4_u_strd; + + /** + * Width of the Chroma (Cr) Buffer + */ + UWORD32 u4_v_wd; + + /** + * Height of the Chroma (Cr) Buffer + */ + UWORD32 u4_v_ht; + + /** + * Stride/Pitch of the Chroma (Cr) Buffer + */ + UWORD32 u4_v_strd; +}iv_yuv_buf_t; + +/*****************************************************************************/ +/* Get Number of Memory Records */ +/*****************************************************************************/ + +/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_GET_NUM_MEM_REC */ + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IV_API_COMMAND_TYPE_T e_cmd; +}iv_num_mem_rec_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; + + /** + * num_mem_rec + */ + UWORD32 u4_num_mem_rec; +}iv_num_mem_rec_op_t; + + +/*****************************************************************************/ +/* Fill Memory Records */ +/*****************************************************************************/ + +/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_FILL_NUM_MEM_REC */ + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IV_API_COMMAND_TYPE_T e_cmd; + + /** + * pointer to array of memrecords structures should be filled by codec + with details of memory resource requirements + */ + iv_mem_rec_t *pv_mem_rec_location; + + /** + * maximum width for which codec should request memory requirements + */ + UWORD32 u4_max_frm_wd; + + /** + * maximum height for which codec should request memory requirements + */ + UWORD32 u4_max_frm_ht; +}iv_fill_mem_rec_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error_code + */ + UWORD32 u4_error_code; + + /** + * no of memory record structures which are filled by codec + */ + UWORD32 u4_num_mem_rec_filled; +}iv_fill_mem_rec_op_t; + + +/*****************************************************************************/ +/* Retrieve Memory Records */ +/*****************************************************************************/ + +/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_RETRIEVE_MEMREC */ + + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IV_API_COMMAND_TYPE_T e_cmd; + + /** + * array of structures where codec should fill with all resources(memory) with it + */ + iv_mem_rec_t *pv_mem_rec_location; +}iv_retrieve_mem_rec_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error_code + */ + UWORD32 u4_error_code; + + /** + * no of memory records filled by codec + */ + UWORD32 u4_num_mem_rec_filled; +}iv_retrieve_mem_rec_op_t; + + + +#endif /* _IV_H */ + diff --git a/common/iv_datatypedef.h b/common/iv_datatypedef.h new file mode 100644 index 0000000..3c45942 --- /dev/null +++ b/common/iv_datatypedef.h @@ -0,0 +1,81 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : datatypedef.h */ +/* */ +/* Description : This file contains all the necessary data type */ +/* definitions. */ +/* */ +/* List of Functions : None */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 29 12 2006 Rajendra C Y Draft */ +/* */ +/*****************************************************************************/ + +#ifndef __IV_DATATYPEDEF_H__ +#define __IV_DATATYPEDEF_H__ + +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +typedef int WORD32; +typedef unsigned int UWORD32; + +typedef short WORD16; +typedef unsigned short UWORD16; + +typedef char WORD8; +typedef unsigned char UWORD8; + +typedef char CHAR; +#ifndef NULL +#define NULL ((void *)0) + +#endif + +typedef enum +{ + IT_FALSE, + IT_TRUE +} IT_BOOL; + + +typedef enum +{ + IT_OK, + IT_ERROR = -1 +} IT_STATUS; + +/*****************************************************************************/ +/* Input and Output Parameter identifiers */ +/*****************************************************************************/ +#define IT_IN +#define IT_OUT + + +#endif /* __IV_DATATYPEDEF_H__ */ + diff --git a/common/mips/impeg2_platform_macros.h b/common/mips/impeg2_platform_macros.h new file mode 100644 index 0000000..05ff6da --- /dev/null +++ b/common/mips/impeg2_platform_macros.h @@ -0,0 +1,49 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_PLATFORM_MACROS_H__ +#define __IMPEG2_PLATFORM_MACROS_H__ + + +#define CONV_LE_TO_BE(u4_temp2,u4_temp1) u4_temp2 = (u4_temp1 << 24) | \ + ((u4_temp1 & 0xff00) << 8) | \ + ((u4_temp1 & 0xff0000) >> 8) | \ + (u4_temp1 >> 24); +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return (__builtin_clz(u4_word)); + else + return 32; +} + + +#define CLIP_U8(x) ((x) > 255) ? (255) : (((x) < 0) ? (0) : (x)) +#define CLIP_S8(x) ((x) > 127) ? (127) : (((x) < -128) ? (-128) : (x)) + +#define CLIP_U12(x) ((x) > 4095) ? (4095) : (((x) < 0) ? (0) : (x)) +#define CLIP_S12(x) ((x) > 2047) ? (2047) : (((x) < -2048) ? (-2048) : (x)) + +#define CLIP_U16(x) ((x) > 65535) ? (65535) : (((x) < 0) ? (0) : (x)) +#define CLIP_S16(x) ((x) > 65535) ? (65535) : (((x) < -65536) ? (-65536) : (x)) +#define PLD(x) + +#define INLINE + +#endif /* __IMPEG2_PLATFORM_MACROS_H__ */ diff --git a/common/x86/impeg2_idct_recon_sse42_intr.c b/common/x86/impeg2_idct_recon_sse42_intr.c new file mode 100755 index 0000000..4142032 --- /dev/null +++ b/common/x86/impeg2_idct_recon_sse42_intr.c @@ -0,0 +1,2205 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * impeg2_itrans_recon_x86_intr.c + * + * @brief + * Contains function definitions for inverse quantization, inverse + * transform and reconstruction + * + * @author + * 100470 + * 100592 (edited by) + * + * @par List of Functions: + * - impeg2_itrans_recon_8x8_sse42() + * + * @remarks + * None + * + ******************************************************************************* + */ +#include <stdio.h> +#include <string.h> +#include "iv_datatypedef.h" +#include "impeg2_macros.h" +#include "impeg2_defs.h" +#include "impeg2_globals.h" + +#include <immintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> +#include <tmmintrin.h> + + +/** + ******************************************************************************* + * + * @brief + * This function performs inverse quantization, inverse transform and + * reconstruction for 8c8 input block + * + * @par Description: + * Performs inverse quantization , inverse transform and adds the + * prediction data and clips output to 8 bit + * + * @param[in] pi2_src + * Input 8x8 coefficients + * + * @param[in] pi2_tmp + * Temporary 8x8 buffer for storing inverse + * transform 1st stage output + * + * @param[in] pu1_pred + * Prediction 8x8 block + * + * @param[in] pi2_dequant_coeff + * Dequant Coeffs + * + * @param[out] pu1_dst + * Output 8x8 block + * + * @param[in] src_strd + * Input stride + * + * @param[in] qp_div + * Quantization parameter / 6 + * + * @param[in] qp_rem + * Quantization parameter % 6 + * + * @param[in] pred_strd + * Prediction stride + * + * @param[in] dst_strd + * Output Stride + * + * @param[in] zero_cols + * Zero columns in pi2_src + * + * @returns Void + * + * @remarks + * None + * + ******************************************************************************* + */ + + +void impeg2_idct_recon_sse42(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + __m128i m_temp_reg_0; + __m128i m_temp_reg_1; + __m128i m_temp_reg_2; + __m128i m_temp_reg_3; + __m128i m_temp_reg_5; + __m128i m_temp_reg_6; + __m128i m_temp_reg_7; + __m128i m_temp_reg_4; + __m128i m_temp_reg_10; + __m128i m_temp_reg_11; + __m128i m_temp_reg_12; + __m128i m_temp_reg_13; + __m128i m_temp_reg_14; + __m128i m_temp_reg_15; + __m128i m_temp_reg_16; + __m128i m_temp_reg_17; + __m128i m_temp_reg_20; + __m128i m_temp_reg_21; + __m128i m_temp_reg_22; + __m128i m_temp_reg_23; + __m128i m_temp_reg_24; + __m128i m_temp_reg_25; + __m128i m_temp_reg_26; + __m128i m_temp_reg_27; + __m128i m_temp_reg_30; + __m128i m_temp_reg_31; + __m128i m_temp_reg_32; + __m128i m_temp_reg_33; + __m128i m_temp_reg_34; + __m128i m_temp_reg_35; + __m128i m_temp_reg_36; + __m128i m_temp_reg_37; + __m128i m_temp_reg_40; + __m128i m_temp_reg_41; + __m128i m_temp_reg_42; + __m128i m_temp_reg_43; + __m128i m_temp_reg_44; + __m128i m_temp_reg_45; + __m128i m_temp_reg_46; + __m128i m_temp_reg_47; + __m128i m_temp_reg_50; + __m128i m_temp_reg_51; + __m128i m_temp_reg_52; + __m128i m_temp_reg_53; + __m128i m_temp_reg_54; + __m128i m_temp_reg_55; + __m128i m_temp_reg_56; + __m128i m_temp_reg_57; + __m128i m_temp_reg_60; + __m128i m_temp_reg_61; + __m128i m_temp_reg_62; + __m128i m_temp_reg_63; + __m128i m_temp_reg_64; + __m128i m_temp_reg_65; + __m128i m_temp_reg_66; + __m128i m_temp_reg_67; + __m128i m_temp_reg_70; + __m128i m_temp_reg_71; + __m128i m_temp_reg_72; + __m128i m_temp_reg_73; + __m128i m_temp_reg_74; + __m128i m_temp_reg_75; + __m128i m_temp_reg_76; + __m128i m_temp_reg_77; + __m128i m_coeff1, m_coeff2, m_coeff3, m_coeff4; + + WORD32 check_row_stage_1; /* Lokesh */ + WORD32 check_row_stage_2; /* Lokesh */ + + __m128i m_rdng_factor; + WORD32 i4_shift = IDCT_STG1_SHIFT; + UNUSED(pi2_tmp); + check_row_stage_1 = ((zero_rows & 0xF0) != 0xF0) ? 1 : 0; + check_row_stage_2 = ((zero_cols & 0xF0) != 0xF0) ? 1 : 0; + + m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_src); + pi2_src += src_strd; + m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_src); + pi2_src += src_strd; + m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_src); + pi2_src += src_strd; + m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_src); + pi2_src += src_strd; + + m_temp_reg_74 = _mm_loadu_si128((__m128i *)pi2_src); + pi2_src += src_strd; + m_temp_reg_75 = _mm_loadu_si128((__m128i *)pi2_src); + pi2_src += src_strd; + m_temp_reg_76 = _mm_loadu_si128((__m128i *)pi2_src); + pi2_src += src_strd; + m_temp_reg_77 = _mm_loadu_si128((__m128i *)pi2_src); + + if(!check_row_stage_2) + { + if(!check_row_stage_1) + { + /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */ + /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */ + { + //Interleaving 0,4 row in 0 , 1 Rishab + /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/ + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]); + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]); + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); + + m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + + } + + + /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */ + /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */ + /* as upper 8 bytes are zeros so m_temp_reg_15 and m_temp_reg_17 are not used*/ + { + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83 + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36 + + /* Combining instructions to eliminate them based on zero_rows : Lokesh */ + //Interleaving 2,6 row in 4, 5 Rishab + m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); + + m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); + m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); + + + /* Loading coeff for computing o0, o1, o2 and o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[3][0]); + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[1][0]); + + + + /* e */ + + /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */ + /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */ + /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */ + /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */ + m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); + m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16); + + m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); + m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14); + + } + + /* o */ + { + + /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */ + { + + m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); + //o0:1B*89+3B*75,5B*50+7B*18 + m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + + m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1))); + m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000); + + + + /* Column 0 of destination computed here */ + /* It is stored in m_temp_reg_50 */ + /* Column 7 of destination computed here */ + /* It is stored in m_temp_reg_57 */ + /* Upper 8 bytes of both registers are zero due to zero_cols*/ + + + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_setzero_si128(); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + //o1:1B*75-3B*18,5B*89+7B*50 + m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + + m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + + /* Loading coeff for computing o2 in the next block */ + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[5][0]); + + /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */ + + + + /* Column 1 of destination computed here */ + /* It is stored in m_temp_reg_51 */ + /* Column 6 of destination computed here */ + /* It is stored in m_temp_reg_56 */ + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + //o2:1B*50-3B*89,5B*18+7B*75 + m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + + m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + + + /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */ + + /* Loading coeff for computing o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[7][0]); + + + + /* Column 2 of destination computed here */ + /* It is stored in m_temp_reg_52 */ + /* Column 5 of destination computed here */ + /* It is stored in m_temp_reg_55 */ + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + //o3:1B*18-3B*50,5B*75-7B*89 + m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + + m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + + + + /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */ + + + + /* Column 3 of destination computed here */ + /* It is stored in m_temp_reg_53 */ + /* Column 4 of destination computed here */ + /* It is stored in m_temp_reg_54 */ + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + + m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + } + } + + /* Transpose of the destination 8x8 matrix done here */ + /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */ + /* respectively */ + { + m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11); + + m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); + + m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13); + + m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5); + m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5); + + m_temp_reg_54 = _mm_setzero_si128(); + m_temp_reg_55 = _mm_setzero_si128(); + m_temp_reg_56 = _mm_setzero_si128(); + m_temp_reg_57 = _mm_setzero_si128(); + } + } + else + { + /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */ + /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */ + { + //Interleaving 0,4 row in 0 , 1 Rishab + /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/ + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]); + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]); + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); + + m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + + } + + + /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */ + /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */ + /* as upper 8 bytes are zeros so m_temp_reg_15 and m_temp_reg_17 are not used*/ + { + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83 + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36 + + /* Combining instructions to eliminate them based on zero_rows : Lokesh */ + //Interleaving 2,6 row in 4, 5 Rishab + m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); + + m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); + m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); + + + /* Loading coeff for computing o0, o1, o2 and o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[3][0]); + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[1][0]); + + + + /* e */ + + /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */ + /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */ + /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */ + /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */ + m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); + m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16); + + m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); + m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14); + + } + + /* o */ + { + + /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */ + { + + m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); + m_temp_reg_64 = _mm_unpacklo_epi16(m_temp_reg_75, m_temp_reg_77); + //o0:1B*89+3B*75,5B*50+7B*18 + m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2); + + m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1))); + m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000); + + m_temp_reg_30 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24); + + + + /* Column 0 of destination computed here */ + /* It is stored in m_temp_reg_50 */ + /* Column 7 of destination computed here */ + /* It is stored in m_temp_reg_57 */ + /* Upper 8 bytes of both registers are zero due to zero_cols*/ + + + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_setzero_si128(); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + //o1:1B*75-3B*18,5B*89+7B*50 + m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4); + + m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + + /* Loading coeff for computing o2 in the next block */ + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[5][0]); + + /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */ + m_temp_reg_32 = _mm_sub_epi32(m_temp_reg_22, m_temp_reg_26); + + + + /* Column 1 of destination computed here */ + /* It is stored in m_temp_reg_51 */ + /* Column 6 of destination computed here */ + /* It is stored in m_temp_reg_56 */ + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + //o2:1B*50-3B*89,5B*18+7B*75 + m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2); + + m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + + + /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */ + + /* Loading coeff for computing o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[7][0]); + + m_temp_reg_34 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24); + + + /* Column 2 of destination computed here */ + /* It is stored in m_temp_reg_52 */ + /* Column 5 of destination computed here */ + /* It is stored in m_temp_reg_55 */ + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + //o3:1B*18-3B*50,5B*75-7B*89 + m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4); + + m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + + + + /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */ + + m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_26); + + + /* Column 3 of destination computed here */ + /* It is stored in m_temp_reg_53 */ + /* Column 4 of destination computed here */ + /* It is stored in m_temp_reg_54 */ + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + + + m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63); + } + } + + /* Transpose of the destination 8x8 matrix done here */ + /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */ + /* respectively */ + { + m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11); + + m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13); + + m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5); + m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5); + + m_temp_reg_54 = _mm_setzero_si128(); + m_temp_reg_55 = _mm_setzero_si128(); + m_temp_reg_56 = _mm_setzero_si128(); + m_temp_reg_57 = _mm_setzero_si128(); + } + } + + /* Stage 2 */ + i4_shift = IDCT_STG2_SHIFT; + { + /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */ + /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */ + { + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[0][0]); //add + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[3][0]); //sub + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_54); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_54); + + m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2); + + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[1][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[2][0]); + } + + + /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */ + /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */ + { + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_56); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_56); + + + m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_1, m_coeff2); + + /* Loading coeff for computing o0 in the next block */ + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[0][0]); + + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_51, m_temp_reg_53); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_51, m_temp_reg_53); + + + + /* e */ + + /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */ + /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */ + /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */ + /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */ + m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); + m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16); + + m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); + m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14); + + m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17); + m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17); + + m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15); + m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15); + + } + + /* o */ + { + + /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */ + { + //o0:1B*89+3B*75,1T*89+3T*75 + m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + + m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1))); + m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000); + /* Loading coeff for computing o1 in the next block */ + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[2][0]); + + + + /* Column 0 of destination computed here */ + /* It is stored in m_temp_reg_50 */ + /* Column 7 of destination computed here */ + /* It is stored in m_temp_reg_57 */ + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); + m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30); + + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31); + m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31); + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor); + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor); + m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor); + m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor); + + //o1:1B*75-3B*18,1T*75-3T*18 + m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_0, m_coeff3); + m_temp_reg_33 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); + + m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift); + m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift); + m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift); + m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift); + + m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3); + m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7); + + + /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */ + + + /* Loading coeff for computing o2 in the next block */ + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[4][0]); + + + + /* Column 1 of destination computed here */ + /* It is stored in m_temp_reg_51 */ + /* Column 6 of destination computed here */ + /* It is stored in m_temp_reg_56 */ + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); + m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32); + + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33); + m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33); + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor); + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor); + m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor); + m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor); + + //o2:1B*50-3B*89,5T*18+7T*75. + m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_35 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + + m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift); + m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift); + m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift); + m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift); + + m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3); + m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7); + + + /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */ + + /* Loading coeff for computing o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[6][0]); + + + /* Column 2 of destination computed here */ + /* It is stored in m_temp_reg_52 */ + /* Column 5 of destination computed here */ + /* It is stored in m_temp_reg_55 */ + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); + m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34); + + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35); + m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35); + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor); + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor); + m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor); + m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor); + + //o3:1B*18-3B*50,1T*18-3T*50 + m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_0, m_coeff3); + m_temp_reg_37 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); + + m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift); + m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift); + m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift); + m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift); + + + m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3); + m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7); + + + + /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */ + + + /* Column 3 of destination computed here */ + /* It is stored in m_temp_reg_53 */ + /* Column 4 of destination computed here */ + /* It is stored in m_temp_reg_54 */ + + m_temp_reg_20 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36); + m_temp_reg_22 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36); + + m_temp_reg_21 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37); + m_temp_reg_23 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37); + + m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_rdng_factor); + m_temp_reg_21 = _mm_add_epi32(m_temp_reg_21, m_rdng_factor); + m_temp_reg_22 = _mm_add_epi32(m_temp_reg_22, m_rdng_factor); + m_temp_reg_23 = _mm_add_epi32(m_temp_reg_23, m_rdng_factor); + + m_temp_reg_20 = _mm_srai_epi32(m_temp_reg_20, i4_shift); + m_temp_reg_21 = _mm_srai_epi32(m_temp_reg_21, i4_shift); + m_temp_reg_22 = _mm_srai_epi32(m_temp_reg_22, i4_shift); + m_temp_reg_23 = _mm_srai_epi32(m_temp_reg_23, i4_shift); + + m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_20, m_temp_reg_21); + m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_22, m_temp_reg_23); + } + } + + /* Transpose of the destination 8x8 matrix done here */ + /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */ + /* respectively */ + { + m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15); + m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15); + + m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17); + m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17); + m_temp_reg_10 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_11 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_12 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5); + m_temp_reg_13 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5); + + m_temp_reg_14 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_15 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_16 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7); + m_temp_reg_17 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7); + } + + /* Recon and store */ + { + m_temp_reg_0 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_1 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_2 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_3 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_4 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_5 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_6 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_7 = _mm_loadl_epi64((__m128i *)pu1_pred); + + m_temp_reg_50 = _mm_setzero_si128(); + m_temp_reg_0 = _mm_unpacklo_epi8(m_temp_reg_0, m_temp_reg_50); + m_temp_reg_1 = _mm_unpacklo_epi8(m_temp_reg_1, m_temp_reg_50); + m_temp_reg_2 = _mm_unpacklo_epi8(m_temp_reg_2, m_temp_reg_50); + m_temp_reg_3 = _mm_unpacklo_epi8(m_temp_reg_3, m_temp_reg_50); + m_temp_reg_4 = _mm_unpacklo_epi8(m_temp_reg_4, m_temp_reg_50); + m_temp_reg_5 = _mm_unpacklo_epi8(m_temp_reg_5, m_temp_reg_50); + m_temp_reg_6 = _mm_unpacklo_epi8(m_temp_reg_6, m_temp_reg_50); + m_temp_reg_7 = _mm_unpacklo_epi8(m_temp_reg_7, m_temp_reg_50); + + m_temp_reg_50 = _mm_add_epi16(m_temp_reg_10, m_temp_reg_0); + m_temp_reg_51 = _mm_add_epi16(m_temp_reg_11, m_temp_reg_1); + m_temp_reg_52 = _mm_add_epi16(m_temp_reg_12, m_temp_reg_2); + m_temp_reg_53 = _mm_add_epi16(m_temp_reg_13, m_temp_reg_3); + m_temp_reg_54 = _mm_add_epi16(m_temp_reg_14, m_temp_reg_4); + m_temp_reg_55 = _mm_add_epi16(m_temp_reg_15, m_temp_reg_5); + m_temp_reg_56 = _mm_add_epi16(m_temp_reg_16, m_temp_reg_6); + m_temp_reg_57 = _mm_add_epi16(m_temp_reg_17, m_temp_reg_7); + + m_temp_reg_50 = _mm_packus_epi16(m_temp_reg_50, m_temp_reg_50); + m_temp_reg_51 = _mm_packus_epi16(m_temp_reg_51, m_temp_reg_51); + m_temp_reg_52 = _mm_packus_epi16(m_temp_reg_52, m_temp_reg_52); + m_temp_reg_53 = _mm_packus_epi16(m_temp_reg_53, m_temp_reg_53); + m_temp_reg_54 = _mm_packus_epi16(m_temp_reg_54, m_temp_reg_54); + m_temp_reg_55 = _mm_packus_epi16(m_temp_reg_55, m_temp_reg_55); + m_temp_reg_56 = _mm_packus_epi16(m_temp_reg_56, m_temp_reg_56); + m_temp_reg_57 = _mm_packus_epi16(m_temp_reg_57, m_temp_reg_57); + + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_50); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_51); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_52); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_53); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_54); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_55); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_56); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_57); + pu1_dst += dst_strd; + } + } + } + else + + { + + /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */ + /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */ + if(!check_row_stage_1) + { + /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */ + /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */ + { + //Interleaving 0,4 row in 0 , 1 Rishab + /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/ + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]); + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]); + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_70, m_temp_reg_74); + + m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + + + m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2); + } + + + /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */ + /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */ + { + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83 + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36 + + /* Combining instructions to eliminate them based on zero_rows : Lokesh */ + //Interleaving 2,6 row in 4, 5 Rishab + m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); + m_temp_reg_5 = _mm_unpackhi_epi16(m_temp_reg_72, m_temp_reg_76); + + m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); + m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); + + m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_5, m_coeff1); + m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_5, m_coeff2); + + + + /* Loading coeff for computing o0, o1, o2 and o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]); + //m_coeff4 = _mm_loadu_si128((__m128i *) &gai2_impeg2_idct_odd_8_q15[3][0]); + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]); + //m_coeff2 = _mm_loadu_si128((__m128i *) &gai2_impeg2_idct_odd_8_q15[1][0]); + + } + + /* e */ + { + /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */ + /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */ + /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */ + /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */ + m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); + m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16); + + m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); + m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14); + + m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17); + m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17); + + m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15); + m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15); + + } + + /* o */ + { + + /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */ + { + + m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); + m_temp_reg_61 = _mm_unpackhi_epi16(m_temp_reg_71, m_temp_reg_73); + //o0:1B*89+3B*75,1T*89+3T*75 + m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_61, m_coeff1); + + m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1))); + m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000); + + } + + /* Column 0 of destination computed here */ + /* It is stored in m_temp_reg_50 */ + /* Column 7 of destination computed here */ + /* It is stored in m_temp_reg_57 */ + { + + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + //o1:1B*75-3B*18,1T*75-3T*18,5B*89+7B*50,5T*89+7T*50 + m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + m_temp_reg_33 = _mm_madd_epi16(m_temp_reg_61, m_coeff3); + + m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + + /* Loading coeff for computing o2 in the next block */ + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]); + + } + + /* Column 1 of destination computed here */ + /* It is stored in m_temp_reg_51 */ + /* Column 6 of destination computed here */ + /* It is stored in m_temp_reg_56 */ + { + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + //o2:1B*50-3B*89,1T*50-3T*89 + m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + m_temp_reg_35 = _mm_madd_epi16(m_temp_reg_61, m_coeff1); + + m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + + + /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */ + + + /* Loading coeff for computing o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]); + + } + + /* Column 2 of destination computed here */ + /* It is stored in m_temp_reg_52 */ + /* Column 5 of destination computed here */ + /* It is stored in m_temp_reg_55 */ + { + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + //o3:1B*18-3B*50,1T*18-3T*50 + m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + m_temp_reg_37 = _mm_madd_epi16(m_temp_reg_61, m_coeff3); + + m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + + + + /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */ + + + } + + /* Column 3 of destination computed here */ + /* It is stored in m_temp_reg_53 */ + /* Column 4 of destination computed here */ + /* It is stored in m_temp_reg_54 */ + { + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + } + } + + /* Transpose of the destination 8x8 matrix done here */ + /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */ + /* respectively */ + { + + + m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15); + m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15); + + m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17); + m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17); + + m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5); + m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5); + + m_temp_reg_54 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_55 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_56 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7); + m_temp_reg_57 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7); + } + } + else + { + + /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */ + /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */ + { + //Interleaving 0,4 row in 0 , 1 Rishab + /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/ + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]); + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]); + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_70, m_temp_reg_74); + + m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + + + m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2); + } + + + /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */ + /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */ + { + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83 + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36 + + /* Combining instructions to eliminate them based on zero_rows : Lokesh */ + //Interleaving 2,6 row in 4, 5 Rishab + m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); + m_temp_reg_5 = _mm_unpackhi_epi16(m_temp_reg_72, m_temp_reg_76); + + m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); + m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); + + m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_5, m_coeff1); + m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_5, m_coeff2); + + + + /* Loading coeff for computing o0, o1, o2 and o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[3][0]); + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[1][0]); + + } + + /* e */ + { + /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */ + /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */ + /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */ + /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */ + m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); + m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16); + + m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); + m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14); + + m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17); + m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17); + + m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15); + m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15); + + } + + /* o */ + { + + /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */ + { + + m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); + m_temp_reg_61 = _mm_unpackhi_epi16(m_temp_reg_71, m_temp_reg_73); + m_temp_reg_64 = _mm_unpacklo_epi16(m_temp_reg_75, m_temp_reg_77); + m_temp_reg_65 = _mm_unpackhi_epi16(m_temp_reg_75, m_temp_reg_77); + //o0:1B*89+3B*75,1T*89+3T*75,5B*50+7B*18,5T*50+7T*18 + m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_61, m_coeff1); + m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2); + m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_65, m_coeff2); + + + m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1))); + m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000); + + m_temp_reg_30 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24); + m_temp_reg_31 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25); + } + + /* Column 0 of destination computed here */ + /* It is stored in m_temp_reg_50 */ + /* Column 7 of destination computed here */ + /* It is stored in m_temp_reg_57 */ + { + + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + //o1:1B*75-3B*18,1T*75-3T*18,5B*89+7B*50,5T*89+7T*50 + m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4); + m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_61, m_coeff3); + m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_65, m_coeff4); + + m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + + /* Loading coeff for computing o2 in the next block */ + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[5][0]); + + /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */ + m_temp_reg_32 = _mm_sub_epi32(m_temp_reg_22, m_temp_reg_26); + m_temp_reg_33 = _mm_sub_epi32(m_temp_reg_23, m_temp_reg_27); + } + + /* Column 1 of destination computed here */ + /* It is stored in m_temp_reg_51 */ + /* Column 6 of destination computed here */ + /* It is stored in m_temp_reg_56 */ + { + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + //o2:1B*50-3B*89,1T*50-3T*89,5B*18+7B*75,5T*18+7T*75 + m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); + m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2); + m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_61, m_coeff1); + m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_65, m_coeff2); + + m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + + + /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */ + + + /* Loading coeff for computing o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[7][0]); + + m_temp_reg_34 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24); + m_temp_reg_35 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25); + } + + /* Column 2 of destination computed here */ + /* It is stored in m_temp_reg_52 */ + /* Column 5 of destination computed here */ + /* It is stored in m_temp_reg_55 */ + { + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + //o3:1B*18-3B*50,1T*18-3T*50,5B*75-7B*89,5T*75-7T*89 + m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); + m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4); + m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_61, m_coeff3); + m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_65, m_coeff4); + + m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + + + + /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */ + + + m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_26); + m_temp_reg_37 = _mm_add_epi32(m_temp_reg_23, m_temp_reg_27); + } + + /* Column 3 of destination computed here */ + /* It is stored in m_temp_reg_53 */ + /* Column 4 of destination computed here */ + /* It is stored in m_temp_reg_54 */ + { + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36); + m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36); + + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37); + m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37); + + m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor); + m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor); + m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor); + m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor); + + m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift); + m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift); + m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift); + m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift); + + m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63); + m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67); + } + } + + /* Transpose of the destination 8x8 matrix done here */ + /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */ + /* respectively */ + { + + + m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15); + m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15); + + m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17); + m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17); + + m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5); + m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5); + + m_temp_reg_54 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_55 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_56 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7); + m_temp_reg_57 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7); + } + } + /* Stage 2 */ + + i4_shift = IDCT_STG2_SHIFT; + + { + + /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */ + /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */ + { + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[0][0]); //add + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[3][0]); //sub + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_54); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_54); + + m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2); + + + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[1][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[2][0]); + } + + + /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */ + /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */ + { + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_56); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_56); + + + m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); + m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_1, m_coeff2); + + /* Loading coeff for computing o0 in the next block */ + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[0][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[1][0]); + + + m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_51, m_temp_reg_53); + m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_51, m_temp_reg_53); + } + + /* e */ + { + /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */ + /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */ + /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */ + /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */ + m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16); + m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16); + + m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14); + m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14); + + m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17); + m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17); + + m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15); + m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15); + + } + + /* o */ + { + m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_55, m_temp_reg_57); + m_temp_reg_5 = _mm_unpackhi_epi16(m_temp_reg_55, m_temp_reg_57); + + /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */ + { + //o0:1B*89+3B*75,1T*89+3T*75,5B*50+7B*18,5T*50+7T*18 + m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); + m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_5, m_coeff2); + + m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1))); + m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000); + /* Loading coeff for computing o1 in the next block */ + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[2][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[3][0]); + + m_temp_reg_30 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24); + m_temp_reg_31 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25); + } + + /* Column 0 of destination computed here */ + /* It is stored in m_temp_reg_50 */ + /* Column 7 of destination computed here */ + /* It is stored in m_temp_reg_57 */ + { + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30); + m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30); + + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31); + m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31); + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor); + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor); + m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor); + m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor); + + m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift); + m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift); + m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift); + m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift); + + //o1:1B*75-3B*18,1T*75-3T*18,5B*89+7B*50,5T*89+7T*50 + m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff3); + m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_4, m_coeff4); + m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); + m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_5, m_coeff4); + + m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3); + m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7); + + + /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */ + + + /* Loading coeff for computing o2 in the next block */ + m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[4][0]); + m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[5][0]); + + m_temp_reg_32 = _mm_sub_epi32(m_temp_reg_22, m_temp_reg_26); + m_temp_reg_33 = _mm_sub_epi32(m_temp_reg_23, m_temp_reg_27); + } + + /* Column 1 of destination computed here */ + /* It is stored in m_temp_reg_51 */ + /* Column 6 of destination computed here */ + /* It is stored in m_temp_reg_56 */ + { + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32); + m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32); + + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33); + m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33); + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor); + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor); + m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor); + m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor); + + m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift); + m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift); + m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift); + m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift); + + //o2:1B*50-3B*89,1T*50-3T*89,5B*18+7B*75,5T*18+7T*75 + m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); + m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); + m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_1, m_coeff1); + m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_5, m_coeff2); + + m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3); + m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7); + + + /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */ + + /* Loading coeff for computing o3 in the next block */ + + m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[6][0]); + m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[7][0]); + + m_temp_reg_34 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24); + m_temp_reg_35 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25); + } + + /* Column 2 of destination computed here */ + /* It is stored in m_temp_reg_52 */ + /* Column 5 of destination computed here */ + /* It is stored in m_temp_reg_55 */ + { + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34); + m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34); + + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35); + m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35); + + m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor); + m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor); + m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor); + m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor); + + m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift); + m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift); + m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift); + m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift); + + //o3:1B*18-3B*50,1T*18-3T*50,5B*75-7B*89,5T*75-7T*89 + m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff3); + m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_4, m_coeff4); + m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); + m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_5, m_coeff4); + + m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3); + m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7); + + + + /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */ + + + m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_26); + m_temp_reg_37 = _mm_add_epi32(m_temp_reg_23, m_temp_reg_27); + } + + /* Column 3 of destination computed here */ + /* It is stored in m_temp_reg_53 */ + /* Column 4 of destination computed here */ + /* It is stored in m_temp_reg_54 */ + { + m_temp_reg_20 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36); + m_temp_reg_22 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36); + + m_temp_reg_21 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37); + m_temp_reg_23 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37); + + m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_rdng_factor); + m_temp_reg_21 = _mm_add_epi32(m_temp_reg_21, m_rdng_factor); + m_temp_reg_22 = _mm_add_epi32(m_temp_reg_22, m_rdng_factor); + m_temp_reg_23 = _mm_add_epi32(m_temp_reg_23, m_rdng_factor); + + m_temp_reg_20 = _mm_srai_epi32(m_temp_reg_20, i4_shift); + m_temp_reg_21 = _mm_srai_epi32(m_temp_reg_21, i4_shift); + m_temp_reg_22 = _mm_srai_epi32(m_temp_reg_22, i4_shift); + m_temp_reg_23 = _mm_srai_epi32(m_temp_reg_23, i4_shift); + + m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_20, m_temp_reg_21); + m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_22, m_temp_reg_23); + } + } + + /* Transpose of the destination 8x8 matrix done here */ + /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */ + /* respectively */ + { + m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51); + m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53); + m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11); + m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15); + m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15); + + m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55); + m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57); + m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13); + m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17); + m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17); + m_temp_reg_10 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_11 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4); + m_temp_reg_12 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5); + m_temp_reg_13 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5); + + m_temp_reg_14 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_15 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6); + m_temp_reg_16 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7); + m_temp_reg_17 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7); + } + + /* Recon and store */ + { + m_temp_reg_0 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_1 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_2 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_3 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_4 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_5 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_6 = _mm_loadl_epi64((__m128i *)pu1_pred); + pu1_pred += pred_strd; + m_temp_reg_7 = _mm_loadl_epi64((__m128i *)pu1_pred); + + + m_temp_reg_50 = _mm_setzero_si128(); + m_temp_reg_0 = _mm_unpacklo_epi8(m_temp_reg_0, m_temp_reg_50); + m_temp_reg_1 = _mm_unpacklo_epi8(m_temp_reg_1, m_temp_reg_50); + m_temp_reg_2 = _mm_unpacklo_epi8(m_temp_reg_2, m_temp_reg_50); + m_temp_reg_3 = _mm_unpacklo_epi8(m_temp_reg_3, m_temp_reg_50); + m_temp_reg_4 = _mm_unpacklo_epi8(m_temp_reg_4, m_temp_reg_50); + m_temp_reg_5 = _mm_unpacklo_epi8(m_temp_reg_5, m_temp_reg_50); + m_temp_reg_6 = _mm_unpacklo_epi8(m_temp_reg_6, m_temp_reg_50); + m_temp_reg_7 = _mm_unpacklo_epi8(m_temp_reg_7, m_temp_reg_50); + + m_temp_reg_50 = _mm_add_epi16(m_temp_reg_10, m_temp_reg_0); + m_temp_reg_51 = _mm_add_epi16(m_temp_reg_11, m_temp_reg_1); + m_temp_reg_52 = _mm_add_epi16(m_temp_reg_12, m_temp_reg_2); + m_temp_reg_53 = _mm_add_epi16(m_temp_reg_13, m_temp_reg_3); + m_temp_reg_54 = _mm_add_epi16(m_temp_reg_14, m_temp_reg_4); + m_temp_reg_55 = _mm_add_epi16(m_temp_reg_15, m_temp_reg_5); + m_temp_reg_56 = _mm_add_epi16(m_temp_reg_16, m_temp_reg_6); + m_temp_reg_57 = _mm_add_epi16(m_temp_reg_17, m_temp_reg_7); + + m_temp_reg_50 = _mm_packus_epi16(m_temp_reg_50, m_temp_reg_50); + m_temp_reg_51 = _mm_packus_epi16(m_temp_reg_51, m_temp_reg_51); + m_temp_reg_52 = _mm_packus_epi16(m_temp_reg_52, m_temp_reg_52); + m_temp_reg_53 = _mm_packus_epi16(m_temp_reg_53, m_temp_reg_53); + m_temp_reg_54 = _mm_packus_epi16(m_temp_reg_54, m_temp_reg_54); + m_temp_reg_55 = _mm_packus_epi16(m_temp_reg_55, m_temp_reg_55); + m_temp_reg_56 = _mm_packus_epi16(m_temp_reg_56, m_temp_reg_56); + m_temp_reg_57 = _mm_packus_epi16(m_temp_reg_57, m_temp_reg_57); + + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_50); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_51); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_52); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_53); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_54); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_55); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_56); + pu1_dst += dst_strd; + _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_57); + pu1_dst += dst_strd; + + } + + + } + + + } +} + +void impeg2_idct_recon_dc_mismatch_sse42(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + WORD32 val; + __m128i value_4x32b, mismatch_stg2_additive; + __m128i pred_r, pred_half0, pred_half1; + __m128i temp0, temp1; + __m128i round_stg2 = _mm_set1_epi32(IDCT_STG2_ROUND); + + UNUSED(pi2_tmp); + UNUSED(src_strd); + UNUSED(zero_cols); + UNUSED(zero_rows); + + val = pi2_src[0] * gai2_impeg2_idct_q15[0]; + val = ((val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); + val *= gai2_impeg2_idct_q11[0]; + value_4x32b = _mm_set1_epi32(val); + + // Row 0 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) gai2_impeg2_mismatch_stg2_additive); + pred_r = _mm_loadl_epi64((__m128i *) pu1_pred); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)pu1_dst, temp0); + + // Row 1 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 8)); + pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd)); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp0); + + // Row 2 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 16)); + pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 2 * pred_strd)); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), temp0); + + // Row 3 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 24)); + pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 3 * pred_strd)); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), temp0); + + // Row 4 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 32)); + pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 4 * pred_strd)); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), temp0); + + // Row 5 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 40)); + pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 5 * pred_strd)); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), temp0); + + // Row 6 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 48)); + pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 6 * pred_strd)); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), temp0); + + // Row 7 processing + mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 56)); + pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 7 * pred_strd)); + pred_r = _mm_cvtepu8_epi16(pred_r); + temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8); + pred_half0 = _mm_cvtepu16_epi32(pred_r); + temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive); + + pred_r = _mm_srli_si128(pred_r, 8); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp0 = _mm_add_epi32(temp0, round_stg2); + temp1 = _mm_add_epi32(temp1, round_stg2); + pred_half1 = _mm_cvtepu16_epi32(pred_r); + temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT); + temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT); + temp0 = _mm_add_epi32(temp0, pred_half0); + temp1 = _mm_add_epi32(temp1, pred_half1); + + temp0 = _mm_packus_epi32(temp0, temp1); + temp0 = _mm_packus_epi16(temp0, temp1); + + _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), temp0); +} + +void impeg2_idct_recon_dc_sse42(WORD16 *pi2_src, + WORD16 *pi2_tmp, + UWORD8 *pu1_pred, + UWORD8 *pu1_dst, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + WORD32 zero_cols, + WORD32 zero_rows) +{ + WORD32 val; + __m128i value_4x32b, pred_r0, pred_r1, temp0, temp1, temp2, temp3; + + UNUSED(pi2_tmp); + UNUSED(src_strd); + UNUSED(zero_cols); + UNUSED(zero_rows); + + val = pi2_src[0] * gai2_impeg2_idct_q15[0]; + val = ((val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); + val = val * gai2_impeg2_idct_q11[0]; + val = ((val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); + + value_4x32b = _mm_set1_epi32(val); + + //Row 0-1 processing + pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred); + pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd)); + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + + temp0 = _mm_cvtepu16_epi32(pred_r0); + pred_r0 = _mm_srli_si128(pred_r0, 8); + temp2 = _mm_cvtepu16_epi32(pred_r1); + pred_r1 = _mm_srli_si128(pred_r1, 8); + temp1 = _mm_cvtepu16_epi32(pred_r0); + temp3 = _mm_cvtepu16_epi32(pred_r1); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp2 = _mm_add_epi32(temp2, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp3 = _mm_add_epi32(temp3, value_4x32b); + temp0 = _mm_packus_epi32(temp0, temp1); + temp2 = _mm_packus_epi32(temp2, temp3); + temp0 = _mm_packus_epi16(temp0, temp1); + temp2 = _mm_packus_epi16(temp2, temp3); + _mm_storel_epi64((__m128i *)(pu1_dst), temp0); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2); + + //Row 2-3 processing + pu1_pred += 2 * pred_strd; + pu1_dst += 2 * dst_strd; + + pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred); + pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd)); + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + + temp0 = _mm_cvtepu16_epi32(pred_r0); + pred_r0 = _mm_srli_si128(pred_r0, 8); + temp2 = _mm_cvtepu16_epi32(pred_r1); + pred_r1 = _mm_srli_si128(pred_r1, 8); + temp1 = _mm_cvtepu16_epi32(pred_r0); + temp3 = _mm_cvtepu16_epi32(pred_r1); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp2 = _mm_add_epi32(temp2, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp3 = _mm_add_epi32(temp3, value_4x32b); + temp0 = _mm_packus_epi32(temp0, temp1); + temp2 = _mm_packus_epi32(temp2, temp3); + temp0 = _mm_packus_epi16(temp0, temp1); + temp2 = _mm_packus_epi16(temp2, temp3); + _mm_storel_epi64((__m128i *)(pu1_dst), temp0); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2); + + //Row 4-5 processing + pu1_pred += 2 * pred_strd; + pu1_dst += 2 * dst_strd; + + pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred); + pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd)); + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + + temp0 = _mm_cvtepu16_epi32(pred_r0); + pred_r0 = _mm_srli_si128(pred_r0, 8); + temp2 = _mm_cvtepu16_epi32(pred_r1); + pred_r1 = _mm_srli_si128(pred_r1, 8); + temp1 = _mm_cvtepu16_epi32(pred_r0); + temp3 = _mm_cvtepu16_epi32(pred_r1); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp2 = _mm_add_epi32(temp2, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp3 = _mm_add_epi32(temp3, value_4x32b); + temp0 = _mm_packus_epi32(temp0, temp1); + temp2 = _mm_packus_epi32(temp2, temp3); + temp0 = _mm_packus_epi16(temp0, temp1); + temp2 = _mm_packus_epi16(temp2, temp3); + _mm_storel_epi64((__m128i *)(pu1_dst), temp0); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2); + + //Row 6-7 processing + pu1_pred += 2 * pred_strd; + pu1_dst += 2 * dst_strd; + + pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred); + pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd)); + pred_r0 = _mm_cvtepu8_epi16(pred_r0); + pred_r1 = _mm_cvtepu8_epi16(pred_r1); + + temp0 = _mm_cvtepu16_epi32(pred_r0); + pred_r0 = _mm_srli_si128(pred_r0, 8); + temp2 = _mm_cvtepu16_epi32(pred_r1); + pred_r1 = _mm_srli_si128(pred_r1, 8); + temp1 = _mm_cvtepu16_epi32(pred_r0); + temp3 = _mm_cvtepu16_epi32(pred_r1); + + temp0 = _mm_add_epi32(temp0, value_4x32b); + temp2 = _mm_add_epi32(temp2, value_4x32b); + temp1 = _mm_add_epi32(temp1, value_4x32b); + temp3 = _mm_add_epi32(temp3, value_4x32b); + temp0 = _mm_packus_epi32(temp0, temp1); + temp2 = _mm_packus_epi32(temp2, temp3); + temp0 = _mm_packus_epi16(temp0, temp1); + temp2 = _mm_packus_epi16(temp2, temp3); + _mm_storel_epi64((__m128i *)(pu1_dst), temp0); + _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2); +} diff --git a/common/x86/impeg2_inter_pred_sse42_intr.c b/common/x86/impeg2_inter_pred_sse42_intr.c new file mode 100644 index 0000000..4599afa --- /dev/null +++ b/common/x86/impeg2_inter_pred_sse42_intr.c @@ -0,0 +1,899 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * impeg2_inter_pred_sse42_intr.c + * + * @brief + * Contains Motion compensation function definitions for MPEG2 decoder + * + * @author + * Mohit [100664] + * + * - impeg2_copy_mb_sse42() + * - impeg2_interpolate_sse42() + * - impeg2_mc_halfx_halfy_8x8_sse42() + * - impeg2_mc_halfx_fully_8x8_sse42() + * - impeg2_mc_fullx_halfy_8x8_sse42() + * - impeg2_mc_fullx_fully_8x8_sse42() + * + * @remarks + * None + * + ******************************************************************************* + */ +#include <stdio.h> +#include <string.h> +#include "iv_datatypedef.h" +#include "impeg2_macros.h" +#include "impeg2_defs.h" +#include "impeg2_inter_pred.h" + +#include <immintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> +#include <tmmintrin.h> + +/******************************************************************************* +* Function Name : impeg2_copy_mb +* +* Description : copies 3 components to the frame from mc_buf +* +* Arguments : +* src_buf : Source Buffer +* dst_buf : Destination Buffer +* src_wd : Source Width +* dst_wd : destination Width +* +* Values Returned : None +*******************************************************************************/ +void impeg2_copy_mb_sse42(yuv_buf_t *src_buf, + yuv_buf_t *dst_buf, + UWORD32 src_wd, + UWORD32 dst_wd) +{ + UWORD8 *src; + UWORD8 *dst; + __m128i src_r0, src_r1, src_r2, src_r3; + + /*******************************************************/ + /* copy Y */ + /*******************************************************/ + src = src_buf->pu1_y; + dst = dst_buf->pu1_y; + // Row 0-3 + src_r0 = _mm_loadu_si128((__m128i *) (src)); + src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); + src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); + src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); + + _mm_storeu_si128((__m128i *) dst, src_r0); + _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); + + // Row 4-7 + src += 4 * src_wd; + dst += 4 * dst_wd; + src_r0 = _mm_loadu_si128((__m128i *) (src)); + src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); + src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); + src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); + + _mm_storeu_si128((__m128i *) dst, src_r0); + _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); + + // Row 8-11 + src += 4 * src_wd; + dst += 4 * dst_wd; + src_r0 = _mm_loadu_si128((__m128i *) (src)); + src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); + src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); + src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); + + _mm_storeu_si128((__m128i *) dst, src_r0); + _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); + + // Row 12-15 + src += 4 * src_wd; + dst += 4 * dst_wd; + src_r0 = _mm_loadu_si128((__m128i *) (src)); + src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); + src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); + src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); + + _mm_storeu_si128((__m128i *) dst, src_r0); + _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); + + src_wd >>= 1; + dst_wd >>= 1; + + /*******************************************************/ + /* copy U */ + /*******************************************************/ + src = src_buf->pu1_u; + dst = dst_buf->pu1_u; + + // Row 0-3 + src_r0 = _mm_loadl_epi64((__m128i *)src); + src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); + src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); + src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); + + _mm_storel_epi64((__m128i *)dst, src_r0); + _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); + _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); + _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); + + // Row 4-7 + src += 4 * src_wd; + dst += 4 * dst_wd; + + src_r0 = _mm_loadl_epi64((__m128i *)src); + src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); + src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); + src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); + + _mm_storel_epi64((__m128i *)dst, src_r0); + _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); + _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); + _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); + + /*******************************************************/ + /* copy V */ + /*******************************************************/ + src = src_buf->pu1_v; + dst = dst_buf->pu1_v; + // Row 0-3 + src_r0 = _mm_loadl_epi64((__m128i *)src); + src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); + src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); + src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); + + _mm_storel_epi64((__m128i *)dst, src_r0); + _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); + _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); + _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); + + // Row 4-7 + src += 4 * src_wd; + dst += 4 * dst_wd; + + src_r0 = _mm_loadl_epi64((__m128i *)src); + src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); + src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); + src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); + + _mm_storel_epi64((__m128i *)dst, src_r0); + _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); + _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); + _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_interpolate */ +/* */ +/* Description : averages the contents of buf_src1 and buf_src2 and stores*/ +/* result in buf_dst */ +/* */ +/* Inputs : buf_src1 - First Source */ +/* buf_src2 - Second Source */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Avg the values from two sources and store the result in */ +/* destination buffer */ +/* */ +/* Outputs : buf_dst - Avg of contents of buf_src1 and buf_src2 */ +/* */ +/* Returns : None */ +/* */ +/* Issues : Assumes that all 3 buffers are of same size */ +/* */ +/*****************************************************************************/ +void impeg2_interpolate_sse42(yuv_buf_t *buf_src1, + yuv_buf_t *buf_src2, + yuv_buf_t *buf_dst, + UWORD32 stride) +{ + UWORD8 *src1, *src2; + UWORD8 *dst; + __m128i src1_r0, src1_r1, src1_r2, src1_r3; + __m128i src2_r0, src2_r1, src2_r2, src2_r3; + + /*******************************************************/ + /* interpolate Y */ + /*******************************************************/ + src1 = buf_src1->pu1_y; + src2 = buf_src2->pu1_y; + dst = buf_dst->pu1_y; + // Row 0-3 + src1_r0 = _mm_loadu_si128((__m128i *) (src1)); + src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); + src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); + src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); + + src2_r0 = _mm_loadu_si128((__m128i *) (src2)); + src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); + src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); + src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storeu_si128((__m128i *) dst, src1_r0); + _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); + + // Row 4-7 + src1 += 4 * 16; + src2 += 4 * 16; + dst += 4 * stride; + src1_r0 = _mm_loadu_si128((__m128i *) (src1)); + src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); + src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); + src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); + + src2_r0 = _mm_loadu_si128((__m128i *) (src2)); + src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); + src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); + src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storeu_si128((__m128i *) dst, src1_r0); + _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); + + // Row 8-11 + src1 += 4 * 16; + src2 += 4 * 16; + dst += 4 * stride; + src1_r0 = _mm_loadu_si128((__m128i *) (src1)); + src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); + src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); + src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); + + src2_r0 = _mm_loadu_si128((__m128i *) (src2)); + src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); + src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); + src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storeu_si128((__m128i *) dst, src1_r0); + _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); + + // Row 12-15 + src1 += 4 * 16; + src2 += 4 * 16; + dst += 4 * stride; + src1_r0 = _mm_loadu_si128((__m128i *) (src1)); + src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); + src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); + src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); + + src2_r0 = _mm_loadu_si128((__m128i *) (src2)); + src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); + src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); + src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storeu_si128((__m128i *) dst, src1_r0); + _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); + _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); + + stride >>= 1; + + /*******************************************************/ + /* interpolate U */ + /*******************************************************/ + src1 = buf_src1->pu1_u; + src2 = buf_src2->pu1_u; + dst = buf_dst->pu1_u; + // Row 0-3 + src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); + src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); + src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); + src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); + + src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); + src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); + src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); + src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storel_epi64((__m128i *) dst, src1_r0); + _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); + _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); + + // Row 4-7 + src1 += 4 * 8; + src2 += 4 * 8; + dst += 4 * stride; + + src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); + src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); + src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); + src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); + + src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); + src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); + src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); + src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storel_epi64((__m128i *) dst, src1_r0); + _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); + _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); + + /*******************************************************/ + /* interpolate V */ + /*******************************************************/ + src1 = buf_src1->pu1_v; + src2 = buf_src2->pu1_v; + dst = buf_dst->pu1_v; + + // Row 0-3 + src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); + src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); + src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); + src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); + + src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); + src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); + src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); + src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storel_epi64((__m128i *) dst, src1_r0); + _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); + _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); + + // Row 4-7 + src1 += 4 * 8; + src2 += 4 * 8; + dst += 4 * stride; + + src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); + src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); + src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); + src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); + + src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); + src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); + src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); + src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); + + src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); + src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); + src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); + src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); + + _mm_storel_epi64((__m128i *) dst, src1_r0); + _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); + _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); + _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_halfx_halfy_8x8_sse42() */ +/* */ +/* Description : Gets the buffer from (0.5,0.5) to (8.5,8.5) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0),(1,0),(0,1),(1,1) position in */ +/* the ref frame.Interpolate these four values to get the */ +/* value at(0.5,0.5).Repeat this to get an 8 x 8 block */ +/* using 9 x 9 block from reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/*****************************************************************************/ +void impeg2_mc_halfx_halfy_8x8_sse42(UWORD8 *out, + UWORD8 *ref, + UWORD32 ref_wid, + UWORD32 out_wid) +{ + UWORD8 *ref_p0,*ref_p1,*ref_p2,*ref_p3; + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 P1 + Q + P2 P3 + */ + __m128i src_r0, src_r0_1, src_r1, src_r1_1; + __m128i tmp0, tmp1; + __m128i value_2 = _mm_set1_epi16(2); + + ref_p0 = ref; + ref_p1 = ref + 1; + ref_p2 = ref + ref_wid; + ref_p3 = ref + ref_wid + 1; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 0 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); + src_r1 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 1 + src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + src_r1 = _mm_cvtepu8_epi16(src_r1); + src_r1_1 = _mm_cvtepu8_epi16(src_r1_1); + + tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 0 horizontal interpolation + tmp1 = _mm_add_epi16(src_r1, src_r1_1); //Row 1 horizontal interpolation + tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 0 vertical interpolation + tmp0 = _mm_add_epi16(tmp0, value_2); + tmp0 = _mm_srli_epi16(tmp0, 2); + tmp0 = _mm_packus_epi16(tmp0, value_2); + + _mm_storel_epi64((__m128i *)out, tmp0); + + //Row 1 + ref_p2 += ref_wid; + ref_p3 += ref_wid; + out += out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 2 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + + tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 2 horizontal interpolation + tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 1 vertical interpolation + tmp1 = _mm_add_epi16(tmp1, value_2); + tmp1 = _mm_srli_epi16(tmp1, 2); + tmp1 = _mm_packus_epi16(tmp1, value_2); + + _mm_storel_epi64((__m128i *)out, tmp1); + + //Row 2 + ref_p2 += ref_wid; + ref_p3 += ref_wid; + out += out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 3 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + + tmp1 = _mm_add_epi16(src_r0, src_r0_1); //Row 3 horizontal interpolation + + tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 2 vertical interpolation + tmp0 = _mm_add_epi16(tmp0, value_2); + tmp0 = _mm_srli_epi16(tmp0, 2); + tmp0 = _mm_packus_epi16(tmp0, value_2); + + _mm_storel_epi64((__m128i *)out, tmp0); + + //Row 3 + ref_p2 += ref_wid; + ref_p3 += ref_wid; + out += out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 4 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + + tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 4 horizontal interpolation + + tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 3 vertical interpolation + tmp1 = _mm_add_epi16(tmp1, value_2); + tmp1 = _mm_srli_epi16(tmp1, 2); + tmp1 = _mm_packus_epi16(tmp1, value_2); + + _mm_storel_epi64((__m128i *)out, tmp1); + + //Row 4 + ref_p2 += ref_wid; + ref_p3 += ref_wid; + out += out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 5 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + + tmp1 = _mm_add_epi16(src_r0, src_r0_1); //Row 5 horizontal interpolation + + tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 4 vertical interpolation + tmp0 = _mm_add_epi16(tmp0, value_2); + tmp0 = _mm_srli_epi16(tmp0, 2); + tmp0 = _mm_packus_epi16(tmp0, value_2); + + _mm_storel_epi64((__m128i *)out, tmp0); + + //Row 5 + ref_p2 += ref_wid; + ref_p3 += ref_wid; + out += out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 6 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + + tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 6 horizontal interpolation + + tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 5 vertical interpolation + tmp1 = _mm_add_epi16(tmp1, value_2); + tmp1 = _mm_srli_epi16(tmp1, 2); + tmp1 = _mm_packus_epi16(tmp1, value_2); + + _mm_storel_epi64((__m128i *)out, tmp1); + + //Row 6 + ref_p2 += ref_wid; + ref_p3 += ref_wid; + out += out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 7 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + + tmp1 = _mm_add_epi16(src_r0, src_r0_1); //Row 7 horizontal interpolation + + tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 6 vertical interpolation + tmp0 = _mm_add_epi16(tmp0, value_2); + tmp0 = _mm_srli_epi16(tmp0, 2); + tmp0 = _mm_packus_epi16(tmp0, value_2); + + _mm_storel_epi64((__m128i *)out, tmp0); + + //Row 7 + ref_p2 += ref_wid; + ref_p3 += ref_wid; + out += out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 8 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); + + src_r0 = _mm_cvtepu8_epi16(src_r0); + src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); + + tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 8 horizontal interpolation + + tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 7 vertical interpolation + tmp1 = _mm_add_epi16(tmp1, value_2); + tmp1 = _mm_srli_epi16(tmp1, 2); + tmp1 = _mm_packus_epi16(tmp1, value_2); + + _mm_storel_epi64((__m128i *)out, tmp1); + + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_halfx_fully_8x8_sse42() */ +/* */ +/* Description : Gets the buffer from (0.5,0) to (8.5,8) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) and (1,0) position in the ref frame */ +/* Interpolate these two values to get the value at(0.5,0) */ +/* Repeat this to get an 8 x 8 block using 9 x 8 block from */ +/* reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/*****************************************************************************/ +void impeg2_mc_halfx_fully_8x8_sse42(UWORD8 *out, + UWORD8 *ref, + UWORD32 ref_wid, + UWORD32 out_wid) +{ + UWORD8 *ref_p0,*ref_p1; + __m128i src_r0, src_r0_1, src_r1, src_r1_1; + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 Q P1 + */ + + ref_p0 = ref; + ref_p1 = ref + 1; + + // Row 0 and 1 + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 0 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); + src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 1 + src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); + + src_r0 = _mm_avg_epu8(src_r0, src_r0_1); + src_r1 = _mm_avg_epu8(src_r1, src_r1_1); + + _mm_storel_epi64((__m128i *)out, src_r0); + _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); + + // Row 2 and 3 + ref_p0 += 2*ref_wid; + ref_p1 += 2*ref_wid; + out += 2*out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 2 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); + src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 3 + src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); + + src_r0 = _mm_avg_epu8(src_r0, src_r0_1); + src_r1 = _mm_avg_epu8(src_r1, src_r1_1); + + _mm_storel_epi64((__m128i *)out, src_r0); + _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); + + // Row 4 and 5 + ref_p0 += 2*ref_wid; + ref_p1 += 2*ref_wid; + out += 2*out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 4 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); + src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 5 + src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); + + src_r0 = _mm_avg_epu8(src_r0, src_r0_1); + src_r1 = _mm_avg_epu8(src_r1, src_r1_1); + + _mm_storel_epi64((__m128i *)out, src_r0); + _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); + + // Row 6 and 7 + ref_p0 += 2*ref_wid; + ref_p1 += 2*ref_wid; + out += 2*out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 6 + src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); + src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 7 + src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); + + src_r0 = _mm_avg_epu8(src_r0, src_r0_1); + src_r1 = _mm_avg_epu8(src_r1, src_r1_1); + + _mm_storel_epi64((__m128i *)out, src_r0); + _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); + + return; +} + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_fullx_halfy_8x8_sse42() */ +/* */ +/* Description : Gets the buffer from (0,0.5) to (8,8.5) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) and (0,1) position in the ref frame */ +/* Interpolate these two values to get the value at(0,0.5) */ +/* Repeat this to get an 8 x 8 block using 8 x 9 block from */ +/* reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/*****************************************************************************/ +void impeg2_mc_fullx_halfy_8x8_sse42(UWORD8 *out, + UWORD8 *ref, + UWORD32 ref_wid, + UWORD32 out_wid) +{ + __m128i src_r0, src_r1, src_r2, temp0, temp1; + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 + x + P1 + */ + src_r0 = _mm_loadl_epi64((__m128i *)ref); //Row 0 + src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); //Row 1 + src_r2 = _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid)); //Row 2 + temp0 = _mm_avg_epu8(src_r0, src_r1); + temp1 = _mm_avg_epu8(src_r1, src_r2); + _mm_storel_epi64((__m128i *)out, temp0); //Row 0 + _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 1 + + ref+= 3*ref_wid; + out+= 2*out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *)ref); //Row 3 + src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); //Row 4 + temp0 = _mm_avg_epu8(src_r2, src_r0); + temp1 = _mm_avg_epu8(src_r0, src_r1); + _mm_storel_epi64((__m128i *)out, temp0); //Row 2 + _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 3 + + ref += 2*ref_wid; + out+= 2*out_wid; + + src_r2 = _mm_loadl_epi64((__m128i *)ref); //Row 5 + src_r0 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); //Row 6 + temp0 = _mm_avg_epu8(src_r1, src_r2); + temp1 = _mm_avg_epu8(src_r2, src_r0); + _mm_storel_epi64((__m128i *)out, temp0); //Row 4 + _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 5 + + ref += 2*ref_wid; + out+= 2*out_wid; + + src_r1 = _mm_loadl_epi64((__m128i *)ref); //Row 7 + src_r2 = _mm_loadl_epi64((__m128i *) (ref + ref_wid)); //Row 8 + temp0 = _mm_avg_epu8(src_r0, src_r1); + temp1 = _mm_avg_epu8(src_r1, src_r2); + _mm_storel_epi64((__m128i *)out, temp0); //Row 6 + _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 7 + + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2_mc_fullx_fully_8x8_sse42() */ +/* */ +/* Description : Gets the buffer from (x,y) to (x+8,y+8) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) position in the ref frame */ +/* Get an 8 x 8 block from reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/*****************************************************************************/ +void impeg2_mc_fullx_fully_8x8_sse42(UWORD8 *out, + UWORD8 *ref, + UWORD32 ref_wid, + UWORD32 out_wid) +{ + __m128i src_r0, src_r1, src_r2, src_r3; + // Row 0-3 + src_r0 = _mm_loadl_epi64((__m128i *)ref); + src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); + src_r2 = _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid)); + src_r3 = _mm_loadl_epi64((__m128i *)(ref + 3 * ref_wid)); + + _mm_storel_epi64((__m128i *)out, src_r0); + _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); + _mm_storel_epi64((__m128i *)(out + 2 * out_wid), src_r2); + _mm_storel_epi64((__m128i *)(out + 3 * out_wid), src_r3); + + // Row 4-7 + ref += 4 * ref_wid; + out += 4 * out_wid; + + src_r0 = _mm_loadl_epi64((__m128i *)ref); + src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); + src_r2 = _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid)); + src_r3 = _mm_loadl_epi64((__m128i *)(ref + 3 * ref_wid)); + + _mm_storel_epi64((__m128i *)out, src_r0); + _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); + _mm_storel_epi64((__m128i *)(out + 2 * out_wid), src_r2); + _mm_storel_epi64((__m128i *)(out + 3 * out_wid), src_r3); + return; +} diff --git a/common/x86/impeg2_mem_func_sse42_intr.c b/common/x86/impeg2_mem_func_sse42_intr.c new file mode 100644 index 0000000..de7de8f --- /dev/null +++ b/common/x86/impeg2_mem_func_sse42_intr.c @@ -0,0 +1,100 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * impeg2_mem_func_sse42_intr.c + * + * @brief + * Contains utility function definitions for MPEG2 codec + * + * @author + * Mohit [100664] + * +* @par List of Functions: +* - impeg2_memset0_16bit_8x8_linear_block_sse42() +* - impeg2_memset_8bit_8x8_block_sse42() + * + * @remarks + * None + * + ******************************************************************************* + */ +#include <stdio.h> +#include <string.h> +#include "iv_datatypedef.h" +#include "impeg2_defs.h" + +#include <immintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> +#include <tmmintrin.h> + +/******************************************************************************* +* Function Name : impeg2_memset0_16bit_8x8_linear_block +* +* Description : memsets resudial buf to 0 +* +* Arguments : destination buffer +* +* Values Returned : None +*******************************************************************************/ + + +void impeg2_memset0_16bit_8x8_linear_block_sse42 (WORD16 *buf) + { + __m128i zero_8x8_16b = _mm_set1_epi16(0); + _mm_storeu_si128((__m128i *) buf, zero_8x8_16b); + _mm_storeu_si128((__m128i *) (buf + 8), zero_8x8_16b); + _mm_storeu_si128((__m128i *) (buf + 16), zero_8x8_16b); + _mm_storeu_si128((__m128i *) (buf + 24), zero_8x8_16b); + _mm_storeu_si128((__m128i *) (buf + 32), zero_8x8_16b); + _mm_storeu_si128((__m128i *) (buf + 40), zero_8x8_16b); + _mm_storeu_si128((__m128i *) (buf + 48), zero_8x8_16b); + _mm_storeu_si128((__m128i *) (buf + 56), zero_8x8_16b); +} + + + +/******************************************************************************* +* Function Name : impeg2_memset_8bit_8x8_block +* +* Description : memsets residual buf to value +* +* Arguments : destination buffer, value and stride +* +* Values Returned : None +*******************************************************************************/ + + +void impeg2_memset_8bit_8x8_block_sse42(UWORD8 *dst, WORD32 dc_val, WORD32 dst_wd) +{ + __m128i value = _mm_set1_epi8((WORD8)dc_val); + + _mm_storel_epi64((__m128i *)dst, value); + _mm_storel_epi64((__m128i *) (dst + dst_wd), value); + _mm_storel_epi64((__m128i *) (dst + 2 * dst_wd), value); + _mm_storel_epi64((__m128i *) (dst + 3 * dst_wd), value); + _mm_storel_epi64((__m128i *) (dst + 4 * dst_wd), value); + _mm_storel_epi64((__m128i *) (dst + 5 * dst_wd), value); + _mm_storel_epi64((__m128i *) (dst + 6 * dst_wd), value); + _mm_storel_epi64((__m128i *) (dst + 7 * dst_wd), value); +} diff --git a/common/x86/impeg2_platform_macros.h b/common/x86/impeg2_platform_macros.h new file mode 100644 index 0000000..05ff6da --- /dev/null +++ b/common/x86/impeg2_platform_macros.h @@ -0,0 +1,49 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2_PLATFORM_MACROS_H__ +#define __IMPEG2_PLATFORM_MACROS_H__ + + +#define CONV_LE_TO_BE(u4_temp2,u4_temp1) u4_temp2 = (u4_temp1 << 24) | \ + ((u4_temp1 & 0xff00) << 8) | \ + ((u4_temp1 & 0xff0000) >> 8) | \ + (u4_temp1 >> 24); +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return (__builtin_clz(u4_word)); + else + return 32; +} + + +#define CLIP_U8(x) ((x) > 255) ? (255) : (((x) < 0) ? (0) : (x)) +#define CLIP_S8(x) ((x) > 127) ? (127) : (((x) < -128) ? (-128) : (x)) + +#define CLIP_U12(x) ((x) > 4095) ? (4095) : (((x) < 0) ? (0) : (x)) +#define CLIP_S12(x) ((x) > 2047) ? (2047) : (((x) < -2048) ? (-2048) : (x)) + +#define CLIP_U16(x) ((x) > 65535) ? (65535) : (((x) < 0) ? (0) : (x)) +#define CLIP_S16(x) ((x) > 65535) ? (65535) : (((x) < -65536) ? (-65536) : (x)) +#define PLD(x) + +#define INLINE + +#endif /* __IMPEG2_PLATFORM_MACROS_H__ */ diff --git a/decoder.arm.mk b/decoder.arm.mk new file mode 100644 index 0000000..6e02ce8 --- /dev/null +++ b/decoder.arm.mk @@ -0,0 +1,22 @@ +libmpeg2d_inc_dir_arm += $(LOCAL_PATH)/decoder/arm +libmpeg2d_inc_dir_arm += $(LOCAL_PATH)/common/arm + +libmpeg2d_srcs_c_arm += decoder/arm/impeg2d_function_selector.c +libmpeg2d_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC + +LOCAL_ARM_MODE := arm + +ifeq ($(ARCH_ARM_HAVE_NEON),true) +libmpeg2d_srcs_c_arm += decoder/arm/impeg2d_function_selector_a9q.c +libmpeg2d_srcs_asm_arm += common/arm/impeg2_format_conv.s +libmpeg2d_srcs_asm_arm += common/arm/impeg2_idct.s +libmpeg2d_srcs_asm_arm += common/arm/impeg2_inter_pred.s +libmpeg2d_srcs_asm_arm += common/arm/impeg2_mem_func.s +libmpeg2d_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARM_A9Q +else +libmpeg2d_cflags_arm += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON +endif + +LOCAL_SRC_FILES_arm += $(libmpeg2d_srcs_c_arm) $(libmpeg2d_srcs_asm_arm) +LOCAL_C_INCLUDES_arm += $(libmpeg2d_inc_dir_arm) +LOCAL_CFLAGS_arm += $(libmpeg2d_cflags_arm) diff --git a/decoder.arm64.mk b/decoder.arm64.mk new file mode 100644 index 0000000..f3a36e6 --- /dev/null +++ b/decoder.arm64.mk @@ -0,0 +1,31 @@ +libmpeg2d_cflags_arm64 += -DARMV8 +libmpeg2d_cflags_arm64 += -DDISABLE_NEONINTR -DARM -DARMGCC + +libmpeg2d_inc_dir_arm64 += $(LOCAL_PATH)/decoder/arm +libmpeg2d_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8 + +libmpeg2d_srcs_c_arm64 += decoder/arm/impeg2d_function_selector.c + +ifeq ($(ARCH_ARM_HAVE_NEON),true) +libmpeg2d_srcs_c_arm64 += decoder/arm/impeg2d_function_selector_av8.c + +libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_neon_macros.s +libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_format_conv.s +libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_idct.s +libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_inter_pred.s +libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_mem_func.s +libmpeg2d_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC +else +libmpeg2d_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON +endif + + + + +LOCAL_SRC_FILES_arm64 += $(libmpeg2d_srcs_c_arm64) $(libmpeg2d_srcs_asm_arm64) +LOCAL_C_INCLUDES_arm64 += $(libmpeg2d_inc_dir_arm64) + +comma := , +LOCAL_ASFLAGS_arm64 += $(addprefix -Wa$(comma)-I,$(libmpeg2d_inc_dir_arm64)) + +LOCAL_CFLAGS_arm64 += $(libmpeg2d_cflags_arm64) diff --git a/decoder.mips.mk b/decoder.mips.mk new file mode 100644 index 0000000..514eb9c --- /dev/null +++ b/decoder.mips.mk @@ -0,0 +1,6 @@ +libmpeg2d_inc_dir_mips += $(LOCAL_PATH)/common/mips + +libmpeg2d_srcs_c_mips += decoder/mips/impeg2d_function_selector.c + +LOCAL_C_INCLUDES_mips += $(libmpeg2d_inc_dir_mips) +LOCAL_SRC_FILES_mips += $(libmpeg2d_srcs_c_mips) diff --git a/decoder.mips64.mk b/decoder.mips64.mk new file mode 100644 index 0000000..5a3bdbc --- /dev/null +++ b/decoder.mips64.mk @@ -0,0 +1,6 @@ +libmpeg2d_inc_dir_mips64 += $(LOCAL_PATH)/common/mips + +libmpeg2d_srcs_c_mips64 += decoder/mips/impeg2d_function_selector.c + +LOCAL_C_INCLUDES_mips64 += $(libmpeg2d_inc_dir_mips) +LOCAL_SRC_FILES_mips64 += $(libmpeg2d_srcs_c_mips) diff --git a/decoder.mk b/decoder.mk new file mode 100644 index 0000000..7edccc2 --- /dev/null +++ b/decoder.mk @@ -0,0 +1,55 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +libmpeg2d_source_dir := $(LOCAL_PATH) + +## Arch-common settings +LOCAL_MODULE := libmpeg2dec +#LOCAL_32_BIT_ONLY := true + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +LOCAL_CFLAGS += -D_LIB -DMULTICORE -fPIC +LOCAL_CFLAGS += -O3 -DANDROID + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/decoder $(LOCAL_PATH)/common + +libmpeg2d_srcs_c += common/impeg2_buf_mgr.c +libmpeg2d_srcs_c += common/impeg2_disp_mgr.c +libmpeg2d_srcs_c += common/impeg2_format_conv.c +libmpeg2d_srcs_c += common/impeg2_globals.c +libmpeg2d_srcs_c += common/impeg2_idct.c +libmpeg2d_srcs_c += common/impeg2_inter_pred.c +libmpeg2d_srcs_c += common/impeg2_job_queue.c +libmpeg2d_srcs_c += common/impeg2_mem_func.c + +libmpeg2d_srcs_c += common/ithread.c + +libmpeg2d_srcs_c += decoder/impeg2d_api_main.c +libmpeg2d_srcs_c += decoder/impeg2d_bitstream.c +libmpeg2d_srcs_c += decoder/impeg2d_debug.c +libmpeg2d_srcs_c += decoder/impeg2d_dec_hdr.c +libmpeg2d_srcs_c += decoder/impeg2d_decoder.c +libmpeg2d_srcs_c += decoder/impeg2d_d_pic.c +libmpeg2d_srcs_c += decoder/impeg2d_function_selector_generic.c +libmpeg2d_srcs_c += decoder/impeg2d_globals.c +libmpeg2d_srcs_c += decoder/impeg2d_i_pic.c +libmpeg2d_srcs_c += decoder/impeg2d_mc.c +libmpeg2d_srcs_c += decoder/impeg2d_mv_dec.c +libmpeg2d_srcs_c += decoder/impeg2d_pic_proc.c +libmpeg2d_srcs_c += decoder/impeg2d_pnb_pic.c +libmpeg2d_srcs_c += decoder/impeg2d_vld.c +libmpeg2d_srcs_c += decoder/impeg2d_vld_tables.c + +LOCAL_SRC_FILES := $(libmpeg2d_srcs_c) $(libmpeg2d_srcs_asm) + + +# Load the arch-specific settings +include $(LOCAL_PATH)/decoder.arm.mk +include $(LOCAL_PATH)/decoder.arm64.mk +include $(LOCAL_PATH)/decoder.x86.mk +include $(LOCAL_PATH)/decoder.x86_64.mk +include $(LOCAL_PATH)/decoder.mips.mk +include $(LOCAL_PATH)/decoder.mips64.mk + +include $(BUILD_STATIC_LIBRARY) diff --git a/decoder.x86.mk b/decoder.x86.mk new file mode 100644 index 0000000..ff6344c --- /dev/null +++ b/decoder.x86.mk @@ -0,0 +1,21 @@ +libmpeg2d_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42 + +libmpeg2d_inc_dir_x86 += $(LOCAL_PATH)/decoder/x86 +libmpeg2d_inc_dir_x86 += $(LOCAL_PATH)/common/x86 + +libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector.c +libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector_avx2.c +libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector_ssse3.c +libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector_sse42.c + + +libmpeg2d_srcs_c_x86 += common/x86/impeg2_idct_recon_sse42_intr.c +libmpeg2d_srcs_c_x86 += common/x86/impeg2_inter_pred_sse42_intr.c +libmpeg2d_srcs_c_x86 += common/x86/impeg2_mem_func_sse42_intr.c + +LOCAL_SRC_FILES_x86 += $(libmpeg2d_srcs_c_x86) $(libmpeg2d_srcs_asm_x86) +LOCAL_C_INCLUDES_x86 += $(libmpeg2d_inc_dir_x86) +LOCAL_CFLAGS_x86 += $(libmpeg2d_cflags_x86) + + + diff --git a/decoder.x86_64.mk b/decoder.x86_64.mk new file mode 100644 index 0000000..72c1820 --- /dev/null +++ b/decoder.x86_64.mk @@ -0,0 +1,21 @@ +libmpeg2d_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42 + +libmpeg2d_inc_dir_x86_64 += $(LOCAL_PATH)/decoder/x86 +libmpeg2d_inc_dir_x86_64 += $(LOCAL_PATH)/common/x86 + +libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector.c +libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector_avx2.c +libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector_ssse3.c +libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector_sse42.c + + +libmpeg2d_srcs_c_x86_64 += common/x86/impeg2_idct_recon_sse42_intr.c +libmpeg2d_srcs_c_x86_64 += common/x86/impeg2_inter_pred_sse42_intr.c +libmpeg2d_srcs_c_x86_64 += common/x86/impeg2_mem_func_sse42_intr.c + +LOCAL_SRC_FILES_x86_64 += $(libmpeg2d_srcs_c_x86_64) $(libmpeg2d_srcs_asm_x86_64) +LOCAL_C_INCLUDES_x86_64 += $(libmpeg2d_inc_dir_x86_64) +LOCAL_CFLAGS_x86_64 += $(libmpeg2d_cflags_x86_64) + + + diff --git a/decoder/arm/impeg2d_function_selector.c b/decoder/arm/impeg2d_function_selector.c new file mode 100644 index 0000000..d43e060 --- /dev/null +++ b/decoder/arm/impeg2d_function_selector.c @@ -0,0 +1,119 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" +#include "impeg2d_mc.h" + +void impeg2d_init_function_ptr_generic(void *pv_codec); +void impeg2d_init_function_ptr_a9q(void *pv_codec); +#ifdef ARMV8 +void impeg2d_init_function_ptr_av8(void *pv_codec); +#endif /* ARMV8 */ + +void impeg2d_init_function_ptr(void *pv_codec) +{ + dec_state_t *ps_codec = (dec_state_t *)pv_codec; + IVD_ARCH_T e_proc_arch = ps_codec->e_processor_arch; + + switch(e_proc_arch) + { + case ARCH_ARM_NONEON: + impeg2d_init_function_ptr_generic(ps_codec); + break; +#ifndef ARMV8 + case ARCH_ARM_A5: + case ARCH_ARM_A7: + case ARCH_ARM_A9: + case ARCH_ARM_A15: + case ARCH_ARM_A9Q: + default: + impeg2d_init_function_ptr_a9q(ps_codec); + break; +#else /* ARMV8 */ + case ARCH_ARMV8_GENERIC: + default: + impeg2d_init_function_ptr_av8(ps_codec); + break; +#endif /* ARMV8 */ + } +} + +void impeg2d_init_arch(void *pv_codec) +{ + dec_state_t *ps_codec = (dec_state_t *)pv_codec; +#ifdef DEFAULT_ARCH +#if DEFAULT_ARCH == D_ARCH_ARM_NONEON + ps_codec->e_processor_arch = ARCH_ARM_NONEON; +#elif DEFAULT_ARCH == D_ARCH_ARMV8_GENERIC + ps_codec->e_processor_arch = ARCH_ARMV8_GENERIC; +#elif DEFAULT_ARCH == D_ARCH_ARM_NEONINTR + ps_codec->e_processor_arch = ARCH_ARM_NEONINTR; +#else + ps_codec->e_processor_arch = ARCH_ARM_A9Q; +#endif +#else + ps_codec->e_processor_arch = ARCH_ARM_A9Q; +#endif +} diff --git a/decoder/arm/impeg2d_function_selector_a9q.c b/decoder/arm/impeg2d_function_selector_a9q.c new file mode 100644 index 0000000..024145d --- /dev/null +++ b/decoder/arm/impeg2d_function_selector_a9q.c @@ -0,0 +1,100 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector_a9q.c +* +* @brief +* Contains functions to initialize a9q function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" +#include "impeg2d_mc.h" + +void impeg2d_init_function_ptr_a9q(dec_state_t *pv_codec) +{ + dec_state_t *dec = (dec_state_t *)pv_codec; + + dec->pf_idct_recon[0] = &impeg2_idct_recon_dc_a9q; + dec->pf_idct_recon[1] = &impeg2_idct_recon_dc_mismatch_a9q; + dec->pf_idct_recon[2] = &impeg2_idct_recon_a9q; + dec->pf_idct_recon[3] = &impeg2_idct_recon_a9q; + + dec->pf_mc[0] = &impeg2d_mc_fullx_fully; + dec->pf_mc[1] = &impeg2d_mc_fullx_halfy; + dec->pf_mc[2] = &impeg2d_mc_halfx_fully; + dec->pf_mc[3] = &impeg2d_mc_halfx_halfy; + + dec->pf_interpolate = &impeg2_interpolate_a9q; + dec->pf_copy_mb = &impeg2_copy_mb_a9q; + + dec->pf_fullx_halfy_8x8 = &impeg2_mc_fullx_halfy_8x8_a9q; + dec->pf_halfx_fully_8x8 = &impeg2_mc_halfx_fully_8x8_a9q; + dec->pf_halfx_halfy_8x8 = &impeg2_mc_halfx_halfy_8x8_a9q; + dec->pf_fullx_fully_8x8 = &impeg2_mc_fullx_fully_8x8_a9q; + + dec->pf_memset_8bit_8x8_block = &impeg2_memset_8bit_8x8_block_a9q; + dec->pf_memset_16bit_8x8_linear_block = &impeg2_memset0_16bit_8x8_linear_block_a9q; + + dec->pf_copy_yuv420p_buf = &impeg2_copy_frm_yuv420p; + dec->pf_fmt_conv_yuv420p_to_yuv422ile = &impeg2_fmt_conv_yuv420p_to_yuv422ile; + dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv = &impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q; + dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu = &impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q; +} + + diff --git a/decoder/arm/impeg2d_function_selector_av8.c b/decoder/arm/impeg2d_function_selector_av8.c new file mode 100644 index 0000000..d163b54 --- /dev/null +++ b/decoder/arm/impeg2d_function_selector_av8.c @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" +#include "ithread.h" + + +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_job_queue.h" +#include "impeg2_globals.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_api.h" +#include "impeg2d_debug.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_mc.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" + +void impeg2d_init_function_ptr_av8(void *pv_codec) +{ + dec_state_t *dec = (dec_state_t *)pv_codec; + + dec->pf_idct_recon[0] = &impeg2_idct_recon_dc_av8; + dec->pf_idct_recon[1] = &impeg2_idct_recon_dc_mismatch_av8; + dec->pf_idct_recon[2] = &impeg2_idct_recon_av8; + dec->pf_idct_recon[3] = &impeg2_idct_recon_av8; + + dec->pf_mc[0] = &impeg2d_mc_fullx_fully; + dec->pf_mc[1] = &impeg2d_mc_fullx_halfy; + dec->pf_mc[2] = &impeg2d_mc_halfx_fully; + dec->pf_mc[3] = &impeg2d_mc_halfx_halfy; + + dec->pf_interpolate = &impeg2_interpolate_av8; + dec->pf_copy_mb = &impeg2_copy_mb_av8; + + dec->pf_fullx_halfy_8x8 = &impeg2_mc_fullx_halfy_8x8_av8; + dec->pf_halfx_fully_8x8 = &impeg2_mc_halfx_fully_8x8_av8; + dec->pf_halfx_halfy_8x8 = &impeg2_mc_halfx_halfy_8x8_av8; + dec->pf_fullx_fully_8x8 = &impeg2_mc_fullx_fully_8x8_av8; + + dec->pf_memset_8bit_8x8_block = &impeg2_memset_8bit_8x8_block_av8; + dec->pf_memset_16bit_8x8_linear_block = &impeg2_memset0_16bit_8x8_linear_block_av8; + + dec->pf_copy_yuv420p_buf = &impeg2_copy_frm_yuv420p; + dec->pf_fmt_conv_yuv420p_to_yuv422ile = &impeg2_fmt_conv_yuv420p_to_yuv422ile; + dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv = &impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8; + dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu = &impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8; +} diff --git a/decoder/impeg2d.h b/decoder/impeg2d.h new file mode 100644 index 0000000..fe38046 --- /dev/null +++ b/decoder/impeg2d.h @@ -0,0 +1,506 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2d.h */ +/* */ +/* Description : This file contains all the necessary structure and */ +/* enumeration definitions needed for the Application */ +/* Program Interface(API) of the Ittiam MPEG2 ASP */ +/* Decoder on Cortex A8 - Neon platform */ +/* */ +/* List of Functions : impeg2d_api_function */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 26 08 2010 100239(RCY) Draft */ +/* */ +/*****************************************************************************/ + +#ifndef __IMPEG2D_H__ +#define __IMPEG2D_H__ + +#include "iv.h" +#include "ivd.h" +#ifdef __cplusplus +extern "C" +{ +#endif + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +#define EXPORT_MPEG2DEC_FULLCODEC_MEM_RECORDS 22 + +/*****************************************************************************/ +/* Function Macros */ +/*****************************************************************************/ +#define IS_IVD_CONCEALMENT_APPLIED(x) (x & (1 << IVD_APPLIEDCONCEALMENT)) +#define IS_IVD_INSUFFICIENTDATA_ERROR(x) (x & (1 << IVD_INSUFFICIENTDATA)) +#define IS_IVD_CORRUPTEDDATA_ERROR(x) (x & (1 << IVD_CORRUPTEDDATA)) +#define IS_IVD_CORRUPTEDHEADER_ERROR(x) (x & (1 << IVD_CORRUPTEDHEADER)) +#define IS_IVD_UNSUPPORTEDINPUT_ERROR(x) (x & (1 << IVD_UNSUPPORTEDINPUT)) +#define IS_IVD_UNSUPPORTEDPARAM_ERROR(x) (x & (1 << IVD_UNSUPPORTEDPARAM)) +#define IS_IVD_FATAL_ERROR(x) (x & (1 << IVD_FATALERROR)) +#define IS_IVD_INVALID_BITSTREAM_ERROR(x) (x & (1 << IVD_INVALID_BITSTREAM)) +#define IS_IVD_INCOMPLETE_BITSTREAM_ERROR(x) (x & (1 << IVD_INCOMPLETE_BITSTREAM)) + +#define SET_IVD_CONCEALMENT_APPLIED(x) ((x) |= (x) | (1 << IVD_APPLIEDCONCEALMENT)) +#define SET_IVD_INSUFFICIENTDATA_ERROR(x) ((x) |= (x) | (1 << IVD_INSUFFICIENTDATA)) +#define SET_IVD_CORRUPTEDDATA_ERROR(x) ((x) |= (x) | (1 << IVD_CORRUPTEDDATA)) +#define SET_IVD_CORRUPTEDHEADER_ERROR(x) ((x) |= (x) | (1 << IVD_CORRUPTEDHEADER)) +#define SET_IVD_UNSUPPORTEDINPUT_ERROR(x) ((x) |= (x) | (1 << IVD_UNSUPPORTEDINPUT)) +#define SET_IVD_UNSUPPORTEDPARAM_ERROR(x) ((x) |= (x) | (1 << IVD_UNSUPPORTEDPARAM)) +#define SET_IVD_FATAL_ERROR(x) ((x) |= (x) | (1 << IVD_FATALERROR)) +#define SET_IVD_INVALID_BITSTREAM_ERROR(x) ((x) |= (x) | (1 << IVD_INVALID_BITSTREAM)) +#define SET_IVD_INCOMPLETE_BITSTREAM_ERROR(x) ((x) |= (x) | (1 << IVD_INCOMPLETE_BITSTREAM)) + +/*****************************************************************************/ +/* API Function Prototype */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_function(iv_obj_t *ps_handle, + void *pv_api_ip, + void *pv_api_op); + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ +/* Codec Error codes for MPEG2 ASP Decoder */ + +typedef enum +{ + + IMPEG2D_UNKNOWN_ERROR = IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS + 1, + /* API calls without init call */ + IMPEG2D_INIT_NOT_DONE, + /* Query number of Memory Records API */ + IMPEG2D_QUERY_NUM_MEM_REC_FAIL, + + /* Fill Memory Records API */ + IMPEG2D_FILL_NUM_MEM_REC_NOT_SUFFICIENT, + + /* Initialize Decoder API */ + IMPEG2D_INIT_DEC_SCR_MEM_INSUFFICIENT, + IMPEG2D_INIT_DEC_PER_MEM_INSUFFICIENT, + IMPEG2D_INIT_NUM_MEM_REC_NOT_SUFFICIENT, + IMPEG2D_INIT_CHROMA_FORMAT_HEIGHT_ERROR, + + /* Decode Sequence Header API */ + IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND, + IMPEG2D_FRM_HDR_MARKER_BIT_NOT_FOUND, + IMPEG2D_PROF_LEVEL_NOT_SUPPORTED, + IMPEG2D_FMT_NOT_SUPPORTED, + IMPEG2D_SCALABILITIY_NOT_SUPPORTED, + IMPEG2D_PIC_SIZE_NOT_SUPPORTED, + + /* Search for start code API */ + //IMPEG2D_SEARCH_START_CODE_FAIL , + /* Decode Video Frame API */ + IMPEG2D_START_CODE_NOT_FOUND, + IMPEG2D_MARKER_BIT_NOT_FOUND, + IMPEG2D_INVALID_STUFFING, + IMPEG2D_PROFILE_LEVEL_NOT_SUP, + IMPEG2D_CHROMA_FMT_NOT_SUP, + IMPEG2D_SCALABLITY_NOT_SUP, + IMPEG2D_FRM_HDR_DECODE_ERR, + IMPEG2D_MB_HDR_DECODE_ERR, + IMPEG2D_MB_TEX_DECODE_ERR, + IMPEG2D_INCORRECT_QUANT_MATRIX, + IMPEG2D_INVALID_SKIP_MB, + IMPEG2D_NOT_SUPPORTED_ERR, + IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR, + IMPEG2D_INVALID_PIC_TYPE, + IMPEG2D_INVALID_HUFFMAN_CODE, + IMPEG2D_NO_FREE_BUF_ERR, + + /* slice header errors */ + IMPEG2D_INVALID_VERT_SIZE, + IMPEG2D_MB_DATA_DECODE_ERR, + + /* Get Display Frame API */ + IMPEG2D_GET_DISP_FRM_FAIL, + + /* Sample Version limitation */ + IMPEG2D_SAMPLE_VERSION_LIMIT_ERR, + /** + * Width/height greater than max width and max height + */ + IMPEG2D_UNSUPPORTED_DIMENSIONS, + + /* Unknown API Command */ + IMPEG2D_UNKNOWN_API_COMMAND + +} IMPEG2D_ERROR_CODES_T; + +/*****************************************************************************/ +/* Extended Structures */ +/*****************************************************************************/ +typedef enum +{ + /** Set number of cores/threads to be used */ + IMPEG2D_CMD_CTL_SET_NUM_CORES = IVD_CMD_CTL_CODEC_SUBCMD_START, + + /** Set processor details */ + IMPEG2D_CMD_CTL_SET_PROCESSOR = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x001, + + /** Get display buffer dimensions */ + IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS = IVD_CMD_CTL_CODEC_SUBCMD_START + + 0x100, + +} IMPEG2D_CMD_CTL_SUB_CMDS; + +/*****************************************************************************/ +/* Get Number of Memory Records */ +/*****************************************************************************/ + +typedef struct +{ + iv_num_mem_rec_ip_t s_ivd_num_mem_rec_ip_t; +} impeg2d_num_mem_rec_ip_t; + +typedef struct +{ + iv_num_mem_rec_op_t s_ivd_num_mem_rec_op_t; +} impeg2d_num_mem_rec_op_t; + +/*****************************************************************************/ +/* Fill Memory Records */ +/*****************************************************************************/ + +typedef struct +{ + iv_fill_mem_rec_ip_t s_ivd_fill_mem_rec_ip_t; + /* Flag to enable sharing of reference buffers between decoder + and application */ + + UWORD32 u4_share_disp_buf; + + /* format in which codec has to give out frame data for display */ + IV_COLOR_FORMAT_T e_output_format; + +} impeg2d_fill_mem_rec_ip_t; + +typedef struct +{ + iv_fill_mem_rec_op_t s_ivd_fill_mem_rec_op_t; +} impeg2d_fill_mem_rec_op_t; + +/*****************************************************************************/ +/* Retrieve Memory Records */ +/*****************************************************************************/ + +typedef struct +{ + iv_retrieve_mem_rec_ip_t s_ivd_retrieve_mem_rec_ip_t; +} impeg2d_retrieve_mem_rec_ip_t; + +typedef struct +{ + iv_retrieve_mem_rec_op_t s_ivd_retrieve_mem_rec_op_t; +} impeg2d_retrieve_mem_rec_op_t; + +/*****************************************************************************/ +/* Initialize decoder */ +/*****************************************************************************/ + +typedef struct +{ + ivd_init_ip_t s_ivd_init_ip_t; + /* Flag to enable sharing of reference buffers between decoder + and application */ + UWORD32 u4_share_disp_buf; + +} impeg2d_init_ip_t; + +typedef struct +{ + ivd_init_op_t s_ivd_init_op_t; +} impeg2d_init_op_t; + +/*****************************************************************************/ +/* Video Decode */ +/*****************************************************************************/ + +typedef struct +{ + ivd_video_decode_ip_t s_ivd_video_decode_ip_t; +} impeg2d_video_decode_ip_t; + +typedef struct +{ + ivd_video_decode_op_t s_ivd_video_decode_op_t; +} impeg2d_video_decode_op_t; + +/*****************************************************************************/ +/* Get Display Frame */ +/*****************************************************************************/ + +typedef struct +{ + ivd_get_display_frame_ip_t s_ivd_get_display_frame_ip_t; +} impeg2d_get_display_frame_ip_t; + +typedef struct +{ + ivd_get_display_frame_op_t s_ivd_get_display_frame_op_t; +} impeg2d_get_display_frame_op_t; + +/*****************************************************************************/ +/* Set Display Frame */ +/*****************************************************************************/ +typedef struct +{ + ivd_set_display_frame_ip_t s_ivd_set_display_frame_ip_t; +} impeg2d_set_display_frame_ip_t; + +typedef struct +{ + ivd_set_display_frame_op_t s_ivd_set_display_frame_op_t; +} impeg2d_set_display_frame_op_t; + +/*****************************************************************************/ +/* Release Display Buffers */ +/*****************************************************************************/ + +typedef struct +{ + ivd_rel_display_frame_ip_t s_ivd_rel_display_frame_ip_t; +} impeg2d_rel_display_frame_ip_t; + +typedef struct +{ + ivd_rel_display_frame_op_t s_ivd_rel_display_frame_op_t; +} impeg2d_rel_display_frame_op_t; + +/*****************************************************************************/ +/* Video control Flush */ +/*****************************************************************************/ + +typedef struct +{ + ivd_ctl_flush_ip_t s_ivd_ctl_flush_ip_t; +} impeg2d_ctl_flush_ip_t; + +typedef struct +{ + ivd_ctl_flush_op_t s_ivd_ctl_flush_op_t; +} impeg2d_ctl_flush_op_t; + +/*****************************************************************************/ +/* Video control reset */ +/*****************************************************************************/ + +typedef struct +{ + ivd_ctl_reset_ip_t s_ivd_ctl_reset_ip_t; +} impeg2d_ctl_reset_ip_t; + +typedef struct +{ + ivd_ctl_reset_op_t s_ivd_ctl_reset_op_t; +} impeg2d_ctl_reset_op_t; + +/*****************************************************************************/ +/* Video control Set Params */ +/*****************************************************************************/ + +typedef struct +{ + ivd_ctl_set_config_ip_t s_ivd_ctl_set_config_ip_t; +} impeg2d_ctl_set_config_ip_t; + +typedef struct +{ + ivd_ctl_set_config_op_t s_ivd_ctl_set_config_op_t; +} impeg2d_ctl_set_config_op_t; + +/*****************************************************************************/ +/* Video control:Get Buf Info */ +/*****************************************************************************/ + +typedef struct +{ + ivd_ctl_getbufinfo_ip_t s_ivd_ctl_getbufinfo_ip_t; +} impeg2d_ctl_getbufinfo_ip_t; + +typedef struct +{ + ivd_ctl_getbufinfo_op_t s_ivd_ctl_getbufinfo_op_t; +} impeg2d_ctl_getbufinfo_op_t; + +/*****************************************************************************/ +/* Video control:Getstatus Call */ +/*****************************************************************************/ + +typedef struct +{ + ivd_ctl_getstatus_ip_t s_ivd_ctl_getstatus_ip_t; +} impeg2d_ctl_getstatus_ip_t; + +typedef struct +{ + ivd_ctl_getstatus_op_t s_ivd_ctl_getstatus_op_t; +} impeg2d_ctl_getstatus_op_t; + +/*****************************************************************************/ +/* Video control:Get Version Info */ +/*****************************************************************************/ + +typedef struct +{ + ivd_ctl_getversioninfo_ip_t s_ivd_ctl_getversioninfo_ip_t; +} impeg2d_ctl_getversioninfo_ip_t; + +typedef struct +{ + ivd_ctl_getversioninfo_op_t s_ivd_ctl_getversioninfo_op_t; +} impeg2d_ctl_getversioninfo_op_t; + +/*****************************************************************************/ +/* Video control:Disable Qpel */ +/*****************************************************************************/ + +typedef struct +{ + UWORD32 u4_size; + IVD_API_COMMAND_TYPE_T e_cmd; + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + UWORD32 u4_num_cores; +} impeg2d_ctl_set_num_cores_ip_t; + +typedef struct +{ + UWORD32 u4_size; + UWORD32 u4_error_code; +} impeg2d_ctl_set_num_cores_op_t; + +typedef struct +{ + /** + * size + */ + UWORD32 u4_size; + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + /** + * sub cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + /** + * Processor type + */ + UWORD32 u4_arch; + /** + * SOC type + */ + UWORD32 u4_soc; + + /** + * num_cores + */ + UWORD32 u4_num_cores; + +} impeg2d_ctl_set_processor_ip_t; + +typedef struct +{ + /** + * size + */ + UWORD32 u4_size; + /** + * error_code + */ + UWORD32 u4_error_code; +} impeg2d_ctl_set_processor_op_t; + +typedef struct +{ + + /** + * size + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +} impeg2d_ctl_get_frame_dimensions_ip_t; + +typedef struct +{ + + /** + * size + */ + UWORD32 u4_size; + + /** + * error_code + */ + UWORD32 u4_error_code; + + /** + * x_offset[3] + */ + UWORD32 u4_x_offset[3]; + + /** + * y_offset[3] + */ + UWORD32 u4_y_offset[3]; + + /** + * disp_wd[3] + */ + UWORD32 u4_disp_wd[3]; + + /** + * disp_ht[3] + */ + UWORD32 u4_disp_ht[3]; + + /** + * buffer_wd[3] + */ + UWORD32 u4_buffer_wd[3]; + + /** + * buffer_ht[3] + */ + UWORD32 u4_buffer_ht[3]; +} impeg2d_ctl_get_frame_dimensions_op_t; + +#ifdef __cplusplus +} /* closing brace for extern "C" */ +#endif + +#endif /* __IMPEG2D_H__ */ diff --git a/decoder/impeg2d_api.h b/decoder/impeg2d_api.h new file mode 100644 index 0000000..bf3943e --- /dev/null +++ b/decoder/impeg2d_api.h @@ -0,0 +1,149 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2d_api.h */ +/* */ +/* Description : This file contains all the necessary examples to */ +/* establish a consistent use of Ittiam C coding */ +/* standards (based on Indian Hill C Standards) */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 10 10 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +#ifndef __IMPEG2D_API_H__ +#define __IMPEG2D_API_H__ + + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ + + +#define DEBUG_PRINT printf + + + + +#define NUM_MEM_RECORDS 4 *MAX_THREADS+NUM_INT_FRAME_BUFFERS + 5 + + +#define SETBIT(a,i) ((a) |= (1 << i)) + + +/*********************/ +/* Codec Versioning */ +/*********************/ + + + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T impeg2d_api_num_mem_rec(void *pv_api_ip, void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_fill_mem_rec(void *pv_api_ip, void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *DECHDL, + void *ps_ip, + void *ps_op); + +IV_API_CALL_STATUS_T impeg2d_api_set_display_frame(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_rel_display_frame(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_retrieve_mem_rec(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_ctl(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_get_version(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_get_buf_info(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_set_flush_mode(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_set_default(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_reset(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_set_params(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_get_status(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_set_params(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip, + impeg2d_fill_mem_rec_op_t *ps_op); + +void impeg2d_dec_frm(void *dec, + impeg2d_video_decode_ip_t *ps_ip, + impeg2d_video_decode_op_t *ps_op); + +void impeg2d_dec_hdr(void *dec, + impeg2d_video_decode_ip_t *ps_ip, + impeg2d_video_decode_op_t *ps_op); + +IV_API_CALL_STATUS_T impeg2d_api_entity(iv_obj_t *DECHDL, + void *pv_api_ip, + void *pv_api_op); + +IV_API_CALL_STATUS_T impeg2d_api_check_struct_sanity(iv_obj_t *ps_handle, + void *pv_api_ip, + void *pv_api_op); + + + + +#endif /* __IMPEG2D_API_H__ */ + diff --git a/decoder/impeg2d_api_main.c b/decoder/impeg2d_api_main.c new file mode 100755 index 0000000..451eb93 --- /dev/null +++ b/decoder/impeg2d_api_main.c @@ -0,0 +1,3258 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : decoder_api_main.c */ +/* */ +/* Description : Functions which recieve the API call from user */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 30 05 2007 Rajneesh Creation */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> + +/* User include files */ +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "ithread.h" + +#include "impeg2_job_queue.h" +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" + +#include "impeg2d.h" +#include "impeg2d_api.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_debug.h" +#include "impeg2d_structs.h" +#include "impeg2d_mc.h" +#include "impeg2d_pic_proc.h" + +#define NUM_FRAMES_LIMIT_ENABLED 0 + +#ifdef LOGO_EN +#include "impeg2_ittiam_logo.h" +#define INSERT_LOGO(buf_y, buf_u, buf_v, stride, x_pos, y_pos, yuv_fmt,disp_wd,disp_ht) impeg2_insert_logo(buf_y, buf_u, buf_v, stride, x_pos, y_pos, yuv_fmt,disp_wd,disp_ht); +#else +#define INSERT_LOGO(buf_y, buf_u, buf_v, stride, x_pos, y_pos, yuv_fmt,disp_wd,disp_ht) +#endif + +#if NUM_FRAMES_LIMIT_ENABLED +#define NUM_FRAMES_LIMIT 10000 +#else +#define NUM_FRAMES_LIMIT 0x7FFFFFFF +#endif + +#define CODEC_NAME "MPEG2VDEC" +#define CODEC_RELEASE_TYPE "eval" +#define CODEC_RELEASE_VER "01.00" +#define CODEC_VENDOR "ITTIAM" + +#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \ + strcpy(version_string,"@(#)Id:"); \ + strcat(version_string,codec_name); \ + strcat(version_string,"_"); \ + strcat(version_string,codec_release_type); \ + strcat(version_string," Ver:"); \ + strcat(version_string,codec_release_ver); \ + strcat(version_string," Released by "); \ + strcat(version_string,codec_vendor); \ + strcat(version_string," Build: "); \ + strcat(version_string,__DATE__); \ + strcat(version_string," @ "); \ + strcat(version_string,__TIME__); + + +#define MIN_OUT_BUFS_420 3 +#define MIN_OUT_BUFS_422ILE 1 +#define MIN_OUT_BUFS_RGB565 1 +#define MIN_OUT_BUFS_420SP 2 + + +void impeg2d_init_arch(void *pv_codec); +void impeg2d_init_function_ptr(void *pv_codec); + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_rel_display_frame */ +/* */ +/* Description : Release displ buffers that will be shared between decoder */ +/* and application */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Just prints error message to console */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 05 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_rel_display_frame(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + + ivd_rel_display_frame_ip_t *dec_rel_disp_ip; + ivd_rel_display_frame_op_t *dec_rel_disp_op; + + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + + + dec_rel_disp_ip = (ivd_rel_display_frame_ip_t *)pv_api_ip; + dec_rel_disp_op = (ivd_rel_display_frame_op_t *)pv_api_op; + + dec_rel_disp_op->u4_error_code = 0; + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + + /* If not in shared disp buf mode, return */ + if(0 == ps_dec_state->u4_share_disp_buf) + return IV_SUCCESS; + + if(NULL == ps_dec_state->pv_pic_buf_mg) + return IV_SUCCESS; + + + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, dec_rel_disp_ip->u4_disp_buf_id, BUF_MGR_DISP); + + return IV_SUCCESS; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_set_display_frame */ +/* */ +/* Description : Sets display buffers that will be shared between decoder */ +/* and application */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Just prints error message to console */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 27 05 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_set_display_frame(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + + ivd_set_display_frame_ip_t *dec_disp_ip; + ivd_set_display_frame_op_t *dec_disp_op; + + UWORD32 i; + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + UWORD32 u4_num_disp_bufs; + + + dec_disp_ip = (ivd_set_display_frame_ip_t *)pv_api_ip; + dec_disp_op = (ivd_set_display_frame_op_t *)pv_api_op; + dec_disp_op->u4_error_code = 0; + + u4_num_disp_bufs = dec_disp_ip->num_disp_bufs; + if(u4_num_disp_bufs > BUF_MGR_MAX_CNT) + u4_num_disp_bufs = BUF_MGR_MAX_CNT; + + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + if(ps_dec_state->u4_share_disp_buf) + { + pic_buf_t *ps_pic_buf; + ps_pic_buf = (pic_buf_t *)ps_dec_state->pv_pic_buf_base; + for(i = 0; i < u4_num_disp_bufs; i++) + { + + ps_pic_buf->pu1_y = dec_disp_ip->s_disp_buffer[i].pu1_bufs[0]; + if(IV_YUV_420P == ps_dec_state->i4_chromaFormat) + { + ps_pic_buf->pu1_u = dec_disp_ip->s_disp_buffer[i].pu1_bufs[1]; + ps_pic_buf->pu1_v = dec_disp_ip->s_disp_buffer[i].pu1_bufs[2]; + } + else + { + ps_pic_buf->pu1_u = ps_dec_state->pu1_chroma_ref_buf[i]; + ps_pic_buf->pu1_v = ps_dec_state->pu1_chroma_ref_buf[i] + + ((ps_dec_state->u2_create_max_width * ps_dec_state->u2_create_max_height) >> 2); + } + + ps_pic_buf->i4_buf_id = i; + + ps_pic_buf->u1_used_as_ref = 0; + + ps_pic_buf->u4_ts = 0; + + impeg2_buf_mgr_add(ps_dec_state->pv_pic_buf_mg, ps_pic_buf, i); + impeg2_buf_mgr_set_status(ps_dec_state->pv_pic_buf_mg, i, BUF_MGR_DISP); + ps_pic_buf++; + + } + } + memcpy(&(ps_dec_state->as_disp_buffers[0]), + &(dec_disp_ip->s_disp_buffer), + u4_num_disp_bufs * sizeof(ivd_out_bufdesc_t)); + + return IV_SUCCESS; + +} + +IV_API_CALL_STATUS_T impeg2d_api_set_num_cores(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + impeg2d_ctl_set_num_cores_ip_t *ps_ip; + impeg2d_ctl_set_num_cores_op_t *ps_op; + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + + ps_ip = (impeg2d_ctl_set_num_cores_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_set_num_cores_op_t *)pv_api_op; + + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + if(ps_ip->u4_num_cores > 0) + { + + + WORD32 i; + for(i = 0; i < MAX_THREADS; i++) + ps_dec_state_multi_core->ps_dec_state[i]->i4_num_cores = ps_ip->u4_num_cores; + } + else + { + ps_dec_state->i4_num_cores = 1; + } + ps_op->u4_error_code = IV_SUCCESS; + + return IV_SUCCESS; +} + +/** +******************************************************************************* +* +* @brief +* Sets Processor type +* +* @par Description: +* Sets Processor type +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns Status +* +* @remarks +* +* +******************************************************************************* +*/ + +IV_API_CALL_STATUS_T impeg2d_set_processor(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + impeg2d_ctl_set_processor_ip_t *ps_ip; + impeg2d_ctl_set_processor_op_t *ps_op; + dec_state_t *ps_codec; + dec_state_multi_core_t *ps_dec_state_multi_core; + + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_codec_obj->pv_codec_handle); + ps_codec = ps_dec_state_multi_core->ps_dec_state[0]; + + ps_ip = (impeg2d_ctl_set_processor_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_set_processor_op_t *)pv_api_op; + + ps_codec->e_processor_arch = (IVD_ARCH_T)ps_ip->u4_arch; + ps_codec->e_processor_soc = (IVD_SOC_T)ps_ip->u4_soc; + + impeg2d_init_function_ptr(ps_codec); + + + ps_op->u4_error_code = 0; + return IV_SUCCESS; +} +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_fill_mem_rec */ +/* */ +/* Description : */ +/* Inputs : */ +/* Globals : */ +/* Processing : */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 17 09 2007 Rajendra C Y Draft */ +/* */ +/*****************************************************************************/ +void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip, + impeg2d_fill_mem_rec_op_t *ps_op) +{ + UWORD32 u4_i; + + UWORD8 u1_no_rec = 0; + UWORD32 max_frm_width,max_frm_height,max_frm_size; + iv_mem_rec_t *ps_mem_rec = ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location; + WORD32 i4_num_threads; + WORD32 i4_share_disp_buf, i4_chroma_format; + WORD32 i4_chroma_size; + + max_frm_width = ALIGN16(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd); + max_frm_height = ALIGN16(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht); + + max_frm_size = (max_frm_width * max_frm_height * 3) >> 1;/* 420 P */ + + i4_chroma_size = max_frm_width * max_frm_height / 4; + + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size > offsetof(impeg2d_fill_mem_rec_ip_t, u4_share_disp_buf)) + { +#ifndef LOGO_EN + i4_share_disp_buf = ps_ip->u4_share_disp_buf; +#else + i4_share_disp_buf = 0; +#endif + } + else + { + i4_share_disp_buf = 0; + } + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size > offsetof(impeg2d_fill_mem_rec_ip_t, e_output_format)) + { + i4_chroma_format = ps_ip->e_output_format; + } + else + { + i4_chroma_format = -1; + } + + + if( (i4_chroma_format != IV_YUV_420P) && + (i4_chroma_format != IV_YUV_420SP_UV) && + (i4_chroma_format != IV_YUV_420SP_VU)) + { + i4_share_disp_buf = 0; + } + + /*************************************************************************/ + /* Fill the memory requirement XDM Handle */ + /*************************************************************************/ + /* ! */ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = sizeof(iv_obj_t); + + ps_mem_rec++; + u1_no_rec++; + + { + /*************************************************************************/ + /* Fill the memory requirement for threads context */ + /*************************************************************************/ + /* ! */ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = sizeof(dec_state_multi_core_t); + + ps_mem_rec++; + u1_no_rec++; + } + + for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++) + { + /*************************************************************************/ + /* Fill the memory requirement for MPEG2 Decoder Context */ + /*************************************************************************/ + /* ! */ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = sizeof(dec_state_t); + + ps_mem_rec++; + u1_no_rec++; + + /* To store thread handle */ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = ithread_get_handle_size(); + + ps_mem_rec++; + u1_no_rec++; + + /*************************************************************************/ + /* Fill the memory requirement for Motion Compensation Buffers */ + /*************************************************************************/ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_SCRATCH_MEM; + + /* for mc_fw_buf.pu1_y */ + ps_mem_rec->u4_mem_size = MB_LUMA_MEM_SIZE; + + /* for mc_fw_buf.pu1_u */ + ps_mem_rec->u4_mem_size += MB_CHROMA_MEM_SIZE; + + /* for mc_fw_buf.pu1_v */ + ps_mem_rec->u4_mem_size += MB_CHROMA_MEM_SIZE; + + /* for mc_bk_buf.pu1_y */ + ps_mem_rec->u4_mem_size += MB_LUMA_MEM_SIZE; + + /* for mc_bk_buf.pu1_u */ + ps_mem_rec->u4_mem_size += MB_CHROMA_MEM_SIZE; + + /* for mc_bk_buf.pu1_v */ + ps_mem_rec->u4_mem_size += MB_CHROMA_MEM_SIZE; + + /* for mc_buf.pu1_y */ + ps_mem_rec->u4_mem_size += MB_LUMA_MEM_SIZE; + + /* for mc_buf.pu1_u */ + ps_mem_rec->u4_mem_size += MB_CHROMA_MEM_SIZE; + + /* for mc_buf.pu1_v */ + ps_mem_rec->u4_mem_size += MB_CHROMA_MEM_SIZE; + + ps_mem_rec++; + u1_no_rec++; + + + /*************************************************************************/ + /* Fill the memory requirement Stack Context */ + /*************************************************************************/ + /* ! */ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = 392; + + ps_mem_rec++; + u1_no_rec++; + } + + + + { + /*************************************************************************/ + /* Fill the memory requirement for Picture Buffer Manager */ + /*************************************************************************/ + /* ! */ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = sizeof(buf_mgr_t) + sizeof(pic_buf_t) * BUF_MGR_MAX_CNT; + + ps_mem_rec++; + u1_no_rec++; + } + /*************************************************************************/ + /* Internal Frame Buffers */ + /*************************************************************************/ +/* ! */ + + { + for(u4_i = 0; u4_i < NUM_INT_FRAME_BUFFERS; u4_i++) + { + /* ! */ + ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + if(0 == i4_share_disp_buf) + ps_mem_rec->u4_mem_size = max_frm_size; + else if(IV_YUV_420P != i4_chroma_format) + { + /* If color format is not 420P and it is shared, then allocate for chroma */ + ps_mem_rec->u4_mem_size = i4_chroma_size * 2; + } + else + ps_mem_rec->u4_mem_size = 64; + ps_mem_rec++; + u1_no_rec++; + } + } + + + + { + WORD32 i4_job_queue_size; + WORD32 i4_num_jobs; + + /* One job per row of MBs */ + i4_num_jobs = max_frm_height >> 4; + + /* One format convert/frame copy job per row of MBs for non-shared mode*/ + i4_num_jobs += max_frm_height >> 4; + + + i4_job_queue_size = impeg2_jobq_ctxt_size(); + i4_job_queue_size += i4_num_jobs * sizeof(job_t); + ps_mem_rec->u4_mem_size = i4_job_queue_size; + ps_mem_rec->u4_mem_alignment = 128; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + + ps_mem_rec++; + u1_no_rec++; + + } + + ps_mem_rec->u4_mem_alignment = 128; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = sizeof(iv_mem_rec_t) * (NUM_MEM_RECORDS); + ps_mem_rec++; + u1_no_rec++; + ps_op->s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled = u1_no_rec; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code = 0; +} + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_get_version */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_get_version(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + char au1_version_string[512]; + + impeg2d_ctl_getversioninfo_ip_t *ps_ip; + impeg2d_ctl_getversioninfo_op_t *ps_op; + + UNUSED(ps_dechdl); + + ps_ip = (impeg2d_ctl_getversioninfo_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_getversioninfo_op_t *)pv_api_op; + + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_SUCCESS; + + VERSION(au1_version_string, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER, + CODEC_VENDOR); + + if((WORD32)ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_version_buffer_size <= 0) + { + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_FAIL; + return (IV_FAIL); + } + + if(ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_version_buffer_size + >= (strlen(au1_version_string) + 1)) + { + memcpy(ps_ip->s_ivd_ctl_getversioninfo_ip_t.pv_version_buffer, + au1_version_string, (strlen(au1_version_string) + 1)); + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_SUCCESS; + } + else + { + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_FAIL; + } + + return (IV_SUCCESS); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_get_buf_info */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_get_buf_info(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + impeg2d_ctl_getbufinfo_ip_t *ps_ctl_bufinfo_ip = + (impeg2d_ctl_getbufinfo_ip_t *)pv_api_ip; + impeg2d_ctl_getbufinfo_op_t *ps_ctl_bufinfo_op = + (impeg2d_ctl_getbufinfo_op_t *)pv_api_op; + UWORD32 u4_i, u4_stride, u4_height; + UNUSED(ps_ctl_bufinfo_ip); + + ps_dec_state_multi_core = + (dec_state_multi_core_t *)(ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_in_bufs = 1; + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs = 1; + + if(ps_dec_state->i4_chromaFormat == IV_YUV_420P) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs = + MIN_OUT_BUFS_420; + } + else if((ps_dec_state->i4_chromaFormat == IV_YUV_420SP_UV) + || (ps_dec_state->i4_chromaFormat == IV_YUV_420SP_VU)) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs = + MIN_OUT_BUFS_420SP; + } + else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs = + MIN_OUT_BUFS_422ILE; + } + else if(ps_dec_state->i4_chromaFormat == IV_RGB_565) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs = + MIN_OUT_BUFS_RGB565; + } + else + { + //Invalid chroma format; Error code may be updated, verify in testing if needed + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code = + IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED; + return IV_FAIL; + } + + for(u4_i = 0; u4_i < IVD_VIDDEC_MAX_IO_BUFFERS; u4_i++) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_in_buf_size[u4_i] = + 0; + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[u4_i] = + 0; + } + + for(u4_i = 0; + u4_i < ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_in_bufs; + u4_i++) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_in_buf_size[u4_i] = + MAX_BITSTREAM_BUFFER_SIZE; + } + + if (0 == ps_dec_state->u4_frm_buf_stride) + { + if (1 == ps_dec_state->u2_header_done) + { + u4_stride = ps_dec_state->u2_horizontal_size; + } + else + { + u4_stride = ps_dec_state->u2_create_max_width; + } + } + else + { + u4_stride = ps_dec_state->u4_frm_buf_stride; + } + u4_height = ((ps_dec_state->u2_frame_height + 15) >> 4) << 4; + + if(ps_dec_state->i4_chromaFormat == IV_YUV_420P) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[0] = + (u4_stride * u4_height); + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[1] = + (u4_stride * u4_height) >> 2; + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[2] = + (u4_stride * u4_height) >> 2; + } + else if((ps_dec_state->i4_chromaFormat == IV_YUV_420SP_UV) + || (ps_dec_state->i4_chromaFormat == IV_YUV_420SP_VU)) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[0] = + (u4_stride * u4_height); + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[1] = + (u4_stride * u4_height) >> 1; + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[2] = 0; + } + else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE) + { + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[0] = + (u4_stride * u4_height) * 2; + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[1] = + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[2] = + 0; + } + + /* Adding initialization for 2 uninitialized values */ + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_num_disp_bufs = 1; + if(ps_dec_state->u4_share_disp_buf) + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_num_disp_bufs = + NUM_INT_FRAME_BUFFERS; + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_size = MAX_FRM_SIZE; + + ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code = IV_SUCCESS; + + return (IV_SUCCESS); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_set_flush_mode */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 08 06 2009 100356 RAVI */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_set_flush_mode(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + impeg2d_ctl_flush_op_t *ps_ctl_dec_op = + (impeg2d_ctl_flush_op_t*)pv_api_op; + + UNUSED(pv_api_ip); + + ps_dec_state_multi_core = + (dec_state_multi_core_t *)(ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + ps_dec_state->u1_flushfrm = 1; + + ps_ctl_dec_op->s_ivd_ctl_flush_op_t.u4_size = + sizeof(impeg2d_ctl_flush_op_t); + ps_ctl_dec_op->s_ivd_ctl_flush_op_t.u4_error_code = IV_SUCCESS; + + return (IV_SUCCESS); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_set_default */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 08 06 2009 100356 RAVI */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_set_default(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + impeg2d_ctl_set_config_op_t *ps_ctl_dec_op = + (impeg2d_ctl_set_config_op_t *)pv_api_op; + + UNUSED(pv_api_ip); + + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_SUCCESS; + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_size = + sizeof(impeg2d_ctl_set_config_op_t); + + ps_dec_state_multi_core = + (dec_state_multi_core_t *)(ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + ps_dec_state->u1_flushfrm = 0; + ps_dec_state->u2_decode_header = 1; + + if (1 == ps_dec_state->u2_header_done) + { + ps_dec_state->u4_frm_buf_stride = ps_dec_state->u2_frame_width; + } + + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_SUCCESS; + + return (IV_SUCCESS); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_reset */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 08 06 2009 100356 RAVI */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_reset(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + UNUSED(pv_api_ip); + impeg2d_ctl_reset_op_t *s_ctl_reset_op = (impeg2d_ctl_reset_op_t *)pv_api_op; + + WORD32 i4_num_threads; + + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + if(ps_dec_state_multi_core != NULL) + { + for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++) + { + + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[i4_num_threads]; + + + /* --------------------------------------------------------------------- */ + /* Initializations */ + + ps_dec_state->u2_header_done = 0; /* Header decoding not done */ + ps_dec_state->u4_frm_buf_stride = 0; + ps_dec_state->u2_is_mpeg2 = 0; + } + } + else + { + s_ctl_reset_op->s_ivd_ctl_reset_op_t.u4_error_code = + IMPEG2D_INIT_NOT_DONE; + } + + return(IV_SUCCESS); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_set_params */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 08 06 2009 100356 RAVI */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_set_params(iv_obj_t *ps_dechdl,void *pv_api_ip,void *pv_api_op) +{ + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + impeg2d_ctl_set_config_ip_t *ps_ctl_dec_ip = (impeg2d_ctl_set_config_ip_t *)pv_api_ip; + impeg2d_ctl_set_config_op_t *ps_ctl_dec_op = (impeg2d_ctl_set_config_op_t *)pv_api_op; + + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + if((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode != IVD_DECODE_HEADER) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode != IVD_DECODE_FRAME)) + { + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL; + return(IV_FAIL); + } + + if((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_frm_out_mode != IVD_DISPLAY_FRAME_OUT) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_frm_out_mode != IVD_DECODE_FRAME_OUT)) + { + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL; + return(IV_FAIL); + } + + if( (WORD32) ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_frm_skip_mode < IVD_SKIP_NONE) + { + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL; + return(IV_FAIL); + } + + if(ps_dec_state->u2_header_done == 1) + { + if(((WORD32)ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < 0) || + ((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd != 0) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < ps_dec_state->u2_frame_width))) + { + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL; + return(IV_FAIL); + } + + } + else + { + if(((WORD32)ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < 0) || + ((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd != 0) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < ps_dec_state->u2_horizontal_size))) + { + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL; + return(IV_FAIL); + } + } + + + ps_dec_state->u2_decode_header = (UWORD8)ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode; + + if(ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd != 0) + { + if(ps_dec_state->u2_header_done == 1) + { + if (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd > ps_dec_state->u2_frame_width) + { + ps_dec_state->u4_frm_buf_stride = ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd; + } + } + else + { + ps_dec_state->u4_frm_buf_stride = ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd; + } + + } + else + { + + if(ps_dec_state->u2_header_done == 1) + { + ps_dec_state->u4_frm_buf_stride = ps_dec_state->u2_frame_width; + } + else + { + ps_dec_state->u4_frm_buf_stride = 0; + } + } + + + if(ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode == IVD_DECODE_FRAME) + { + ps_dec_state->u1_flushfrm = 0; + } + + + ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_SUCCESS; + return(IV_SUCCESS); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_get_status */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 08 06 2009 100356 RAVI */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_get_status(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + UWORD32 u4_i,u4_stride,u4_height; + impeg2d_ctl_getstatus_ip_t *ps_ctl_dec_ip = (impeg2d_ctl_getstatus_ip_t *)pv_api_ip; + impeg2d_ctl_getstatus_op_t *ps_ctl_dec_op = (impeg2d_ctl_getstatus_op_t *)pv_api_op; + UNUSED(ps_ctl_dec_ip); + + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_size = sizeof(impeg2d_ctl_getstatus_op_t); + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_num_disp_bufs = 1; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_pic_ht = ps_dec_state->u2_frame_height; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_pic_wd = ps_dec_state->u2_frame_width; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_frame_rate = ps_dec_state->u2_framePeriod; + + + if(ps_dec_state->u2_progressive_sequence == 1) + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.e_content_type = IV_PROGRESSIVE ; + else + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.e_content_type = IV_INTERLACED; + + + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.e_output_chroma_format = (IV_COLOR_FORMAT_T)ps_dec_state->i4_chromaFormat; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_in_bufs = 1; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs = 1; + + + if(ps_dec_state->i4_chromaFormat == IV_YUV_420P) + { + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs = MIN_OUT_BUFS_420; + } + else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE) + { + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE; + } + else if(ps_dec_state->i4_chromaFormat == IV_RGB_565) + { + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565; + } + else + { + //Invalid chroma format; Error code may be updated, verify in testing if needed + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_error_code = IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED; + return IV_FAIL; + } + + memset(&ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_in_buf_size[0],0,(sizeof(UWORD32)*IVD_VIDDEC_MAX_IO_BUFFERS)); + memset(&ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0],0,(sizeof(UWORD32)*IVD_VIDDEC_MAX_IO_BUFFERS)); + + for(u4_i = 0; u4_i < ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_in_bufs; u4_i++) + { + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_in_buf_size[u4_i] = MAX_BITSTREAM_BUFFER_SIZE; + } + + u4_stride = ps_dec_state->u4_frm_buf_stride; + u4_height = ((ps_dec_state->u2_frame_height + 15) >> 4) << 4; + + if(ps_dec_state->i4_chromaFormat == IV_YUV_420P) + { + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0] = (u4_stride * u4_height); + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[1] = (u4_stride * u4_height)>>2 ; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[2] = (u4_stride * u4_height)>>2; + } + else if((ps_dec_state->i4_chromaFormat == IV_YUV_420SP_UV) || (ps_dec_state->i4_chromaFormat == IV_YUV_420SP_VU)) + { + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0] = (u4_stride * u4_height); + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[1] = (u4_stride * u4_height)>>1 ; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[2] = 0; + } + else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE) + { + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0] = (u4_stride * u4_height)*2; + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[1] = ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[2] = 0; + } + + ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_error_code = IV_SUCCESS; + + return(IV_SUCCESS); + +} + +/** +******************************************************************************* +* +* @brief +* Gets frame dimensions/offsets +* +* @par Description: +* Gets frame buffer chararacteristics such a x & y offsets display and +* buffer dimensions +* +* @param[in] ps_codec_obj +* Pointer to codec object at API level +* +* @param[in] pv_api_ip +* Pointer to input argument structure +* +* @param[out] pv_api_op +* Pointer to output argument structure +* +* @returns Status +* +* @remarks +* +* +******************************************************************************* +*/ +IV_API_CALL_STATUS_T impeg2d_get_frame_dimensions(iv_obj_t *ps_codec_obj, + void *pv_api_ip, + void *pv_api_op) +{ + impeg2d_ctl_get_frame_dimensions_ip_t *ps_ip; + impeg2d_ctl_get_frame_dimensions_op_t *ps_op; + WORD32 disp_wd, disp_ht, buffer_wd, buffer_ht, x_offset, y_offset; + dec_state_t *ps_codec; + dec_state_multi_core_t *ps_dec_state_multi_core; + + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_codec_obj->pv_codec_handle); + ps_codec = ps_dec_state_multi_core->ps_dec_state[0]; + + + ps_ip = (impeg2d_ctl_get_frame_dimensions_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_get_frame_dimensions_op_t *)pv_api_op; + UNUSED(ps_ip); + if(ps_codec->u2_header_done) + { + disp_wd = ps_codec->u2_horizontal_size; + disp_ht = ps_codec->u2_vertical_size; + + if(0 == ps_codec->u4_share_disp_buf) + { + buffer_wd = disp_wd; + buffer_ht = disp_ht; + } + else + { + buffer_wd = ps_codec->u2_frame_width; + buffer_ht = ps_codec->u2_frame_height; + } + } + else + { + + disp_wd = ps_codec->u2_create_max_width; + disp_ht = ps_codec->u2_create_max_height; + + if(0 == ps_codec->u4_share_disp_buf) + { + buffer_wd = disp_wd; + buffer_ht = disp_ht; + } + else + { + buffer_wd = ALIGN16(disp_wd); + buffer_ht = ALIGN16(disp_ht); + + } + } + if(ps_codec->u2_frame_width > buffer_wd) + buffer_wd = ps_codec->u2_frame_width; + + x_offset = 0; + y_offset = 0; + + + ps_op->u4_disp_wd[0] = disp_wd; + ps_op->u4_disp_ht[0] = disp_ht; + ps_op->u4_buffer_wd[0] = buffer_wd; + ps_op->u4_buffer_ht[0] = buffer_ht; + ps_op->u4_x_offset[0] = x_offset; + ps_op->u4_y_offset[0] = y_offset; + + ps_op->u4_disp_wd[1] = ps_op->u4_disp_wd[2] = ((ps_op->u4_disp_wd[0] + 1) + >> 1); + ps_op->u4_disp_ht[1] = ps_op->u4_disp_ht[2] = ((ps_op->u4_disp_ht[0] + 1) + >> 1); + ps_op->u4_buffer_wd[1] = ps_op->u4_buffer_wd[2] = (ps_op->u4_buffer_wd[0] + >> 1); + ps_op->u4_buffer_ht[1] = ps_op->u4_buffer_ht[2] = (ps_op->u4_buffer_ht[0] + >> 1); + ps_op->u4_x_offset[1] = ps_op->u4_x_offset[2] = (ps_op->u4_x_offset[0] + >> 1); + ps_op->u4_y_offset[1] = ps_op->u4_y_offset[2] = (ps_op->u4_y_offset[0] + >> 1); + + if((ps_codec->i4_chromaFormat == IV_YUV_420SP_UV) + || (ps_codec->i4_chromaFormat == IV_YUV_420SP_VU)) + { + ps_op->u4_disp_wd[2] = 0; + ps_op->u4_disp_ht[2] = 0; + ps_op->u4_buffer_wd[2] = 0; + ps_op->u4_buffer_ht[2] = 0; + ps_op->u4_x_offset[2] = 0; + ps_op->u4_y_offset[2] = 0; + + ps_op->u4_disp_wd[1] <<= 1; + ps_op->u4_buffer_wd[1] <<= 1; + ps_op->u4_x_offset[1] <<= 1; + } + + return IV_SUCCESS; + +} + +IV_API_CALL_STATUS_T impeg2d_api_function (iv_obj_t *ps_dechdl, void *pv_api_ip,void *pv_api_op) +{ + WORD32 i4_cmd; + IV_API_CALL_STATUS_T u4_error_code; + UWORD32 *pu4_api_ip; + + u4_error_code = impeg2d_api_check_struct_sanity(ps_dechdl,pv_api_ip,pv_api_op); + if(IV_SUCCESS != u4_error_code) + { + return u4_error_code; + } + + + pu4_api_ip = (UWORD32 *)pv_api_ip; + i4_cmd = *(pu4_api_ip + 1); + + switch(i4_cmd) + { + + case IV_CMD_GET_NUM_MEM_REC: + u4_error_code = impeg2d_api_num_mem_rec((void *)pv_api_ip,(void *)pv_api_op); + break; + + case IV_CMD_FILL_NUM_MEM_REC: + u4_error_code = impeg2d_api_fill_mem_rec((void *)pv_api_ip,(void *)pv_api_op); + break; + + case IV_CMD_INIT: + u4_error_code = impeg2d_api_init(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op); + break; + + case IVD_CMD_SET_DISPLAY_FRAME: + u4_error_code = impeg2d_api_set_display_frame(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op); + break; + + case IVD_CMD_REL_DISPLAY_FRAME: + u4_error_code = impeg2d_api_rel_display_frame(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op); + break; + + case IVD_CMD_VIDEO_DECODE: + u4_error_code = impeg2d_api_entity(ps_dechdl, (void *)pv_api_ip,(void *)pv_api_op); + break; + + case IV_CMD_RETRIEVE_MEMREC: + u4_error_code = impeg2d_api_retrieve_mem_rec(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op); + break; + + case IVD_CMD_VIDEO_CTL: + u4_error_code = impeg2d_api_ctl(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op); + break; + + default: + break; + } + + return(u4_error_code); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_num_mem_rec */ +/* */ +/* Description : The function get the number mem records library needs */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Just prints error message to console */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 23 09 2010 Hamsalekha Creation */ +/* */ +/*****************************************************************************/ + + +IV_API_CALL_STATUS_T impeg2d_api_num_mem_rec(void *pv_api_ip,void *pv_api_op) +{ + /* To Query No of Memory Records */ + impeg2d_num_mem_rec_ip_t *ps_query_mem_rec_ip; + impeg2d_num_mem_rec_op_t *ps_query_mem_rec_op; + + ps_query_mem_rec_ip = (impeg2d_num_mem_rec_ip_t *)pv_api_ip; + ps_query_mem_rec_op = (impeg2d_num_mem_rec_op_t *)pv_api_op; + + UNUSED(ps_query_mem_rec_ip); + ps_query_mem_rec_op->s_ivd_num_mem_rec_op_t.u4_size = sizeof(impeg2d_num_mem_rec_op_t); + + ps_query_mem_rec_op->s_ivd_num_mem_rec_op_t.u4_num_mem_rec = (UWORD32)NUM_MEM_RECORDS; + + ps_query_mem_rec_op->s_ivd_num_mem_rec_op_t.u4_error_code = IV_SUCCESS; + + + return(IV_SUCCESS); + +} + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_fill_mem_rec */ +/* */ +/* Description : Thsi functions fills details of each mem record lib needs*/ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Just prints error message to console */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 23 09 2010 Hamsalekha Creation */ +/* */ +/*****************************************************************************/ + + +IV_API_CALL_STATUS_T impeg2d_api_fill_mem_rec(void *pv_api_ip,void *pv_api_op) +{ + + impeg2d_fill_mem_rec_ip_t *ps_mem_q_ip; + impeg2d_fill_mem_rec_op_t *ps_mem_q_op; + + + ps_mem_q_ip = pv_api_ip; + ps_mem_q_op = pv_api_op; + + + impeg2d_fill_mem_rec((impeg2d_fill_mem_rec_ip_t *)ps_mem_q_ip, + (impeg2d_fill_mem_rec_op_t *)ps_mem_q_op); + + + return(IV_SUCCESS); + +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_init */ +/* */ +/* Description : */ +/* Inputs : */ +/* Globals : */ +/* Processing : */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 17 09 2007 Rajendra C Y Draft */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl, + void *ps_ip, + void *ps_op) +{ + UWORD32 i; + + void *pv; + UWORD32 u4_size; + + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + UWORD32 u4_num_mem_rec; + iv_mem_rec_t *ps_mem_rec ; + iv_mem_rec_t *ps_frm_buf; + iv_obj_t *ps_dec_handle; + WORD32 i4_max_wd, i4_max_ht; + + impeg2d_init_ip_t *ps_dec_init_ip; + impeg2d_init_op_t *ps_dec_init_op; + WORD32 i4_num_threads; + UWORD32 u4_share_disp_buf, u4_chroma_format; + + ps_dec_init_ip = (impeg2d_init_ip_t *)ps_ip; + ps_dec_init_op = (impeg2d_init_op_t *)ps_op; + + i4_max_wd = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_wd); + i4_max_ht = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_ht); + + if(ps_dec_init_ip->s_ivd_init_ip_t.u4_size > offsetof(impeg2d_init_ip_t, u4_share_disp_buf)) + { +#ifndef LOGO_EN + u4_share_disp_buf = ps_dec_init_ip->u4_share_disp_buf; +#else + u4_share_disp_buf = 0; +#endif + } + else + { + u4_share_disp_buf = 0; + } + + u4_chroma_format = ps_dec_init_ip->s_ivd_init_ip_t.e_output_format; + + if( (u4_chroma_format != IV_YUV_420P) && + (u4_chroma_format != IV_YUV_420SP_UV) && + (u4_chroma_format != IV_YUV_420SP_VU)) + { + u4_share_disp_buf = 0; + } + + + + + + ps_mem_rec = ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location; + ps_mem_rec ++; + + + ps_dec_init_op->s_ivd_init_op_t.u4_size = sizeof(impeg2d_init_op_t); + + + /* Except memTab[0], all other memTabs are initialized to zero */ + for(i = 1; i < ps_dec_init_ip->s_ivd_init_ip_t.u4_num_mem_rec; i++) + { + memset(ps_mem_rec->pv_base,0,ps_mem_rec->u4_mem_size); + ps_mem_rec++; + } + + /* Reinitializing memTab[0] memory base address */ + ps_mem_rec = ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location; + + + /* memTab[0] is for codec Handle,redundant currently not being used */ + ps_dec_handle = ps_mem_rec->pv_base; + u4_num_mem_rec = 1; + ps_mem_rec++; + + + + + + /* decoder handle */ + ps_dec_state_multi_core = ps_mem_rec->pv_base; + u4_num_mem_rec++; + ps_mem_rec++; + + + { + ps_dec_handle->pv_codec_handle = (void *)ps_dec_state_multi_core; /* Initializing codec context */ + + ps_dechdl->pv_codec_handle = (void *)ps_dec_state_multi_core; + ps_dechdl->pv_fxns = (void *)impeg2d_api_function; + } + + + for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++) + { + /*************************************************************************/ + /* For MPEG2 Decoder Context */ + /*************************************************************************/ + ps_dec_state = ps_mem_rec->pv_base; + + ps_dec_state_multi_core->ps_dec_state[i4_num_threads] = ps_dec_state; + + ps_dec_state->ps_dec_state_multi_core = ps_dec_state_multi_core; + + ps_dec_state->i4_num_cores = 1; + /* @ */ /* Used for storing MemRecords */ + u4_num_mem_rec++; + ps_mem_rec++; + + /* Thread handle */ + ps_dec_state->pv_codec_thread_handle = ps_mem_rec->pv_base; + u4_num_mem_rec++; + ps_mem_rec++; + + /*************************************************************************/ + /* For Motion Compensation Buffers */ + /*************************************************************************/ + pv = ps_mem_rec->pv_base; + + /* for mc_fw_buf.pu1_y */ + + ps_dec_state->s_mc_fw_buf.pu1_y = pv; + pv = (void *)((UWORD8 *)pv + MB_LUMA_MEM_SIZE); + + u4_size = sizeof(UWORD8) * MB_LUMA_MEM_SIZE; + /* for mc_fw_buf.pu1_u */ + + ps_dec_state->s_mc_fw_buf.pu1_u = pv; + pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE); + + u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE; + + /* for mc_fw_buf.pu1_v */ + + ps_dec_state->s_mc_fw_buf.pu1_v = pv; + pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE); + + u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE; + + /* for mc_bk_buf.pu1_y */ + + ps_dec_state->s_mc_bk_buf.pu1_y = pv; + pv = (void *)((UWORD8 *)pv + MB_LUMA_MEM_SIZE); + + u4_size += sizeof(UWORD8) * MB_LUMA_MEM_SIZE; + + /* for mc_bk_buf.pu1_u */ + + ps_dec_state->s_mc_bk_buf.pu1_u = pv; + pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE); + + u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE; + + /* for mc_bk_buf.pu1_v */ + + ps_dec_state->s_mc_bk_buf.pu1_v = pv; + pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE); + + u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE; + + /* for mc_buf.pu1_y */ + + ps_dec_state->s_mc_buf.pu1_y = pv; + pv = (void *)((UWORD8 *)pv + MB_LUMA_MEM_SIZE); + + u4_size += sizeof(UWORD8) * MB_LUMA_MEM_SIZE; + + /* for mc_buf.pu1_u */ + + ps_dec_state->s_mc_buf.pu1_u = pv; + pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE); + + u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE; + + /* for mc_buf.pu1_v */ + + ps_dec_state->s_mc_buf.pu1_v = pv; + + u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE; + + u4_num_mem_rec++; + ps_mem_rec++; + + + + ps_dec_state->pv_pic_buf_mg = 0; + + /*************************************************************************/ + /* For saving stack context to support global error handling */ + /*************************************************************************/ + ps_dec_state->pv_stack_cntxt = ps_mem_rec->pv_base; + u4_num_mem_rec++; + ps_mem_rec++; + + } + + + + + + /*************************************************************************/ + /* For Picture Buffer Manager */ + /*************************************************************************/ + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + ps_dec_state->pv_pic_buf_mg = ps_mem_rec->pv_base; + ps_dec_state->pv_pic_buf_base = (UWORD8 *)ps_mem_rec->pv_base + sizeof(buf_mgr_t); + + u4_num_mem_rec++; + ps_mem_rec++; + + + + for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++) + { + + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[i4_num_threads]; + + + /* --------------------------------------------------------------------- */ + /* Initializations */ + + ps_dec_state->u2_header_done = 0; /* Header decoding not done */ + + + { + UWORD32 u4_max_frm_width,u4_max_frm_height; + + u4_max_frm_width = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_wd); + u4_max_frm_height = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_ht); + + ps_dec_state->u2_create_max_width = u4_max_frm_width; + ps_dec_state->u2_create_max_height = u4_max_frm_height; + + ps_dec_state->i4_chromaFormat = ps_dec_init_ip->s_ivd_init_ip_t.e_output_format; + ps_dec_state->u4_frm_buf_stride = 0 ; + ps_dec_state->u2_frame_width = u4_max_frm_width; + ps_dec_state->u2_picture_width = u4_max_frm_width; + ps_dec_state->u2_horizontal_size = u4_max_frm_width; + + ps_dec_state->u2_frame_height = u4_max_frm_height; + ps_dec_state->u2_vertical_size = u4_max_frm_height; + ps_dec_state->u4_share_disp_buf = u4_share_disp_buf; + } + } + + + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + if((ps_dec_state->i4_chromaFormat == IV_YUV_422ILE) + &&((ps_dec_state->u2_vertical_size & 0x1) != 0)) + { + //printf("Error! Height should be multiple of 2 if Chroma format is 422ILE\n"); + ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_CHROMA_FORMAT_HEIGHT_ERROR; + return(IV_FAIL); + + + } + + /* --------------------------------------------------------------------- */ + + +/* ! */ + // picture buffer manager initialization will be done only for first thread + impeg2_disp_mgr_init(&ps_dec_state->s_disp_mgr); + impeg2_buf_mgr_init((buf_mgr_t *)ps_dec_state->pv_pic_buf_mg); + + /*************************************************************************/ + /* Internal Frame Buffers */ + /*************************************************************************/ + + + /* Set first frame to grey */ + { + ps_frm_buf = ps_mem_rec; + memset(ps_frm_buf->pv_base, 128, ps_frm_buf->u4_mem_size); + ps_frm_buf++; + } + + if(0 == ps_dec_state->u4_share_disp_buf) + { + pic_buf_t *ps_pic_buf; + ps_pic_buf = (pic_buf_t *)ps_dec_state->pv_pic_buf_base; + for(i = 0; i < NUM_INT_FRAME_BUFFERS; i++) + { + UWORD8 *pu1_buf; + pu1_buf = ps_mem_rec->pv_base; + + ps_pic_buf->pu1_y = pu1_buf; + pu1_buf += i4_max_ht * i4_max_wd; + + ps_pic_buf->pu1_u = pu1_buf; + pu1_buf += i4_max_ht * i4_max_wd >> 2; + + ps_pic_buf->pu1_v = pu1_buf; + pu1_buf += i4_max_ht * i4_max_wd >> 2; + + ps_pic_buf->i4_buf_id = i; + + ps_pic_buf->u1_used_as_ref = 0; + + ps_pic_buf->u4_ts = 0; + + impeg2_buf_mgr_add(ps_dec_state->pv_pic_buf_mg, ps_pic_buf, i); + ps_mem_rec++; + ps_pic_buf++; + } + u4_num_mem_rec += NUM_INT_FRAME_BUFFERS; + } + else if (ps_dec_state->i4_chromaFormat != IV_YUV_420P) + { + for(i = 0; i < NUM_INT_FRAME_BUFFERS; i++) + { + ps_dec_state->pu1_chroma_ref_buf[i] = ps_mem_rec->pv_base; + ps_mem_rec++; + } + + u4_num_mem_rec += NUM_INT_FRAME_BUFFERS; + } + else + { + ps_mem_rec+=NUM_INT_FRAME_BUFFERS; + u4_num_mem_rec += NUM_INT_FRAME_BUFFERS; + } + + + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + + ps_dec_state->pv_jobq_buf = ps_mem_rec->pv_base; + ps_dec_state->i4_jobq_buf_size = ps_mem_rec->u4_mem_size; + ps_mem_rec++; + + if(u4_num_mem_rec > ps_dec_init_ip->s_ivd_init_ip_t.u4_num_mem_rec) + { + ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_NUM_MEM_REC_NOT_SUFFICIENT; + return(IV_FAIL); + + } + + ps_dec_state->u1_flushfrm = 0; + ps_dec_state->u1_flushcnt = 0; + ps_dec_state->pv_jobq = impeg2_jobq_init(ps_dec_state->pv_jobq_buf, ps_dec_state->i4_jobq_buf_size); + + + /*************************************************************************/ + /* MemTab[12] is used for storing TabRecords */ + /*************************************************************************/ + ps_dec_state->pv_memTab = (void *)ps_mem_rec->pv_base; + memcpy(ps_mem_rec->pv_base,ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location, ps_mem_rec->u4_mem_size); + /* Updating in Decoder Context with memRecords */ + u4_num_mem_rec++; + ps_mem_rec++; + ps_dec_state->u4_num_mem_records = u4_num_mem_rec; + + + ps_dec_state->u4_num_frames_decoded = 0; + ps_dec_state->aps_ref_pics[0] = NULL; + ps_dec_state->aps_ref_pics[1] = NULL; + + ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IV_SUCCESS; + impeg2d_init_arch(ps_dec_state); + + impeg2d_init_function_ptr(ps_dec_state); + + return(IV_SUCCESS); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_retrieve_mem_rec */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_retrieve_mem_rec(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + UWORD32 u4_i; + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + iv_mem_rec_t *ps_mem_rec; + iv_mem_rec_t *ps_temp_rec; + + + + impeg2d_retrieve_mem_rec_ip_t *ps_retr_mem_rec_ip; + impeg2d_retrieve_mem_rec_op_t *ps_retr_mem_rec_op; + + ps_retr_mem_rec_ip = (impeg2d_retrieve_mem_rec_ip_t *)pv_api_ip; + ps_retr_mem_rec_op = (impeg2d_retrieve_mem_rec_op_t *)pv_api_op; + + ps_mem_rec = ps_retr_mem_rec_ip->s_ivd_retrieve_mem_rec_ip_t.pv_mem_rec_location; + ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle); + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + ps_temp_rec = ps_dec_state->pv_memTab; + + for(u4_i = 0; u4_i < (ps_dec_state->u4_num_mem_records);u4_i++) + { + ps_mem_rec[u4_i].u4_mem_size = ps_temp_rec[u4_i].u4_mem_size; + ps_mem_rec[u4_i].u4_mem_alignment = ps_temp_rec[u4_i].u4_mem_alignment; + ps_mem_rec[u4_i].e_mem_type = ps_temp_rec[u4_i].e_mem_type; + ps_mem_rec[u4_i].pv_base = ps_temp_rec[u4_i].pv_base; + } + + ps_retr_mem_rec_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code = IV_SUCCESS; + ps_retr_mem_rec_op->s_ivd_retrieve_mem_rec_op_t.u4_num_mem_rec_filled = ps_dec_state->u4_num_mem_records; + + impeg2_jobq_deinit(ps_dec_state->pv_jobq); + IMPEG2D_PRINT_STATISTICS(); + + + return(IV_SUCCESS); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_ctl */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_ctl(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + WORD32 i4_sub_cmd; + UWORD32 *pu4_api_ip; + IV_API_CALL_STATUS_T u4_error_code; + + pu4_api_ip = (UWORD32 *)pv_api_ip; + i4_sub_cmd = *(pu4_api_ip + 2); + + switch(i4_sub_cmd) + { + case IVD_CMD_CTL_GETPARAMS: + u4_error_code = impeg2d_api_get_status(ps_dechdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_CTL_SETPARAMS: + u4_error_code = impeg2d_api_set_params(ps_dechdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_CTL_RESET: + u4_error_code = impeg2d_api_reset(ps_dechdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_CTL_SETDEFAULT: + u4_error_code = impeg2d_api_set_default(ps_dechdl, + (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_CTL_FLUSH: + u4_error_code = impeg2d_api_set_flush_mode(ps_dechdl, + (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_CTL_GETBUFINFO: + u4_error_code = impeg2d_api_get_buf_info(ps_dechdl, + (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IVD_CMD_CTL_GETVERSION: + u4_error_code = impeg2d_api_get_version(ps_dechdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IMPEG2D_CMD_CTL_SET_NUM_CORES: + u4_error_code = impeg2d_api_set_num_cores(ps_dechdl, + (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS: + u4_error_code = impeg2d_get_frame_dimensions(ps_dechdl, + (void *)pv_api_ip, + (void *)pv_api_op); + break; + + case IMPEG2D_CMD_CTL_SET_PROCESSOR: + u4_error_code = impeg2d_set_processor(ps_dechdl, (void *)pv_api_ip, + (void *)pv_api_op); + break; + + default: + u4_error_code = IV_FAIL; + break; + } + + return (u4_error_code); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_check_struct_sanity */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ +IV_API_CALL_STATUS_T impeg2d_api_check_struct_sanity(iv_obj_t *ps_handle, + void *pv_api_ip, + void *pv_api_op) +{ + WORD32 i4_cmd; + UWORD32 *pu4_api_ip; + UWORD32 *pu4_api_op; + WORD32 i,j; + + if(NULL == pv_api_op) + return(IV_FAIL); + + if(NULL == pv_api_ip) + return(IV_FAIL); + + pu4_api_ip = (UWORD32 *)pv_api_ip; + pu4_api_op = (UWORD32 *)pv_api_op; + i4_cmd = (IVD_API_COMMAND_TYPE_T)*(pu4_api_ip + 1); + + /* error checks on handle */ + switch(i4_cmd) + { + case IV_CMD_GET_NUM_MEM_REC: + case IV_CMD_FILL_NUM_MEM_REC: + break; + case IV_CMD_INIT: + if(ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + break; + case IVD_CMD_GET_DISPLAY_FRAME: + case IVD_CMD_VIDEO_DECODE: + case IV_CMD_RETRIEVE_MEMREC: + case IVD_CMD_SET_DISPLAY_FRAME: + case IVD_CMD_REL_DISPLAY_FRAME: + case IVD_CMD_VIDEO_CTL: + { + if(ps_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->u4_size != sizeof(iv_obj_t)) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_handle->pv_fxns != impeg2d_api_function) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL; + return IV_FAIL; + } + + if(ps_handle->pv_codec_handle == NULL) + { + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL; + return IV_FAIL; + } + } + break; + default: + *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_INVALID_API_CMD; + return IV_FAIL; + } + + switch(i4_cmd) + { + case IV_CMD_GET_NUM_MEM_REC: + { + impeg2d_num_mem_rec_ip_t *ps_ip = (impeg2d_num_mem_rec_ip_t *)pv_api_ip; + impeg2d_num_mem_rec_op_t *ps_op = (impeg2d_num_mem_rec_op_t *)pv_api_op; + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_num_mem_rec_ip_t.u4_size != sizeof(impeg2d_num_mem_rec_ip_t)) + { + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_op->s_ivd_num_mem_rec_op_t.u4_size != sizeof(impeg2d_num_mem_rec_op_t)) + { + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + } + break; + case IV_CMD_FILL_NUM_MEM_REC: + { + impeg2d_fill_mem_rec_ip_t *ps_ip = (impeg2d_fill_mem_rec_ip_t *)pv_api_ip; + impeg2d_fill_mem_rec_op_t *ps_op = (impeg2d_fill_mem_rec_op_t *)pv_api_op; + iv_mem_rec_t *ps_mem_rec; + + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size != sizeof(impeg2d_fill_mem_rec_ip_t)) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_op->s_ivd_fill_mem_rec_op_t.u4_size != sizeof(impeg2d_fill_mem_rec_op_t)) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd < MIN_WIDTH) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_WIDTH_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd > MAX_WIDTH) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_WIDTH_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht < MIN_HEIGHT) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht > MAX_HEIGHT) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(NULL == ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_NUM_REC_NOT_SUFFICIENT; + return(IV_FAIL); + } + + /* check memrecords sizes are correct */ + ps_mem_rec = ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location; + for(i=0;i<NUM_MEM_RECORDS;i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + } + break; + + case IV_CMD_INIT: + { + impeg2d_init_ip_t *ps_ip = (impeg2d_init_ip_t *)pv_api_ip; + impeg2d_init_op_t *ps_op = (impeg2d_init_op_t *)pv_api_op; + iv_mem_rec_t *ps_mem_rec; + UWORD32 u4_tot_num_mem_recs; + + ps_op->s_ivd_init_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_init_ip_t.u4_size != sizeof(impeg2d_init_ip_t)) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_op->s_ivd_init_op_t.u4_size != sizeof(impeg2d_init_op_t)) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + u4_tot_num_mem_recs = NUM_MEM_RECORDS; + + + + + if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec > u4_tot_num_mem_recs) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_NOT_SUFFICIENT; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_init_ip_t.u4_frm_max_wd < MIN_WIDTH) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_init_ip_t.u4_frm_max_wd > MAX_WIDTH) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_init_ip_t.u4_frm_max_ht < MIN_HEIGHT) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_init_ip_t.u4_frm_max_ht > MAX_HEIGHT) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED; + return(IV_FAIL); + } + + if(NULL == ps_ip->s_ivd_init_ip_t.pv_mem_rec_location) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_NUM_REC_NOT_SUFFICIENT; + return(IV_FAIL); + } + + if((ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420P) && + (ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_422ILE)&&(ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420SP_UV)&&(ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420SP_VU)) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED; + return(IV_FAIL); + } + + /* verify number of mem records */ + if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec < NUM_MEM_RECORDS) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT; + return IV_FAIL; + } + + ps_mem_rec = ps_ip->s_ivd_init_ip_t.pv_mem_rec_location; + /* verify wether first memrecord is handle or not */ + /* + if(ps_mem_rec->pv_base != ps_handle) + { + // indicate the incorrect handle error + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INVALID_HANDLE; + return IV_FAIL; + } +*/ + /* check memrecords sizes are correct */ + for(i=0;i < (WORD32)ps_ip->s_ivd_init_ip_t.u4_num_mem_rec ; i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + + /* verify memtabs for overlapping regions */ + { + UWORD8 *pau1_start[NUM_MEM_RECORDS]; + UWORD8 *pau1_end[NUM_MEM_RECORDS]; + + + pau1_start[0] = (UWORD8 *)(ps_mem_rec[0].pv_base); + pau1_end[0] = (UWORD8 *)(ps_mem_rec[0].pv_base) + ps_mem_rec[0].u4_mem_size - 1; + for(i = 1; i < (WORD32)ps_ip->s_ivd_init_ip_t.u4_num_mem_rec; i++) + { + /* This array is populated to check memtab overlapp */ + pau1_start[i] = (UWORD8 *)(ps_mem_rec[i].pv_base); + pau1_end[i] = (UWORD8 *)(ps_mem_rec[i].pv_base) + ps_mem_rec[i].u4_mem_size - 1; + + for(j = 0; j < i; j++) + { + if((pau1_start[i] >= pau1_start[j]) && (pau1_start[i] <= pau1_end[j])) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + + if((pau1_end[i] >= pau1_start[j]) && (pau1_end[i] <= pau1_end[j])) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + + if((pau1_start[i] < pau1_start[j]) && (pau1_end[i] > pau1_end[j])) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_OVERLAP_ERR; + return IV_FAIL; + } + } + } + } + + + + + { + iv_mem_rec_t as_mem_rec_ittiam_api[NUM_MEM_RECORDS]; + + impeg2d_fill_mem_rec_ip_t s_fill_mem_rec_ip; + impeg2d_fill_mem_rec_op_t s_fill_mem_rec_op; + IV_API_CALL_STATUS_T e_status; + WORD32 i4_num_memrec; + { + + iv_num_mem_rec_ip_t s_no_of_mem_rec_query_ip; + iv_num_mem_rec_op_t s_no_of_mem_rec_query_op; + + + s_no_of_mem_rec_query_ip.u4_size = sizeof(iv_num_mem_rec_ip_t); + s_no_of_mem_rec_query_op.u4_size = sizeof(iv_num_mem_rec_op_t); + + s_no_of_mem_rec_query_ip.e_cmd = IV_CMD_GET_NUM_MEM_REC; + impeg2d_api_function(NULL, + (void *)&s_no_of_mem_rec_query_ip, + (void *)&s_no_of_mem_rec_query_op); + + i4_num_memrec = s_no_of_mem_rec_query_op.u4_num_mem_rec; + + + + } + + + /* initialize mem records array with sizes */ + for(i = 0; i < i4_num_memrec; i++) + { + as_mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t); + } + + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size = sizeof(impeg2d_fill_mem_rec_ip_t); + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd = IV_CMD_FILL_NUM_MEM_REC; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd = ps_ip->s_ivd_init_ip_t.u4_frm_max_wd; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht = ps_ip->s_ivd_init_ip_t.u4_frm_max_ht; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location = as_mem_rec_ittiam_api; + s_fill_mem_rec_ip.u4_share_disp_buf = ps_ip->u4_share_disp_buf; + s_fill_mem_rec_ip.e_output_format = ps_ip->s_ivd_init_ip_t.e_output_format; + s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size = sizeof(impeg2d_fill_mem_rec_op_t); + + + e_status = impeg2d_api_function(NULL, + (void *)&s_fill_mem_rec_ip, + (void *)&s_fill_mem_rec_op); + if(IV_FAIL == e_status) + { + ps_op->s_ivd_init_op_t.u4_error_code = s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code; + return(IV_FAIL); + } + + + + for(i = 0; i < i4_num_memrec; i ++) + { + if(ps_mem_rec[i].pv_base == NULL) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_BASE_NULL; + return IV_FAIL; + } +#ifdef CHECK_ALIGN + + if((UWORD32)(ps_mem_rec[i].pv_base) & (ps_mem_rec[i].u4_mem_alignment - 1)) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR; + return IV_FAIL; + } +#endif //CHECK_ALIGN + if(ps_mem_rec[i].u4_mem_alignment != as_mem_rec_ittiam_api[i].u4_mem_alignment) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR; + return IV_FAIL; + } + + if(ps_mem_rec[i].u4_mem_size < as_mem_rec_ittiam_api[i].u4_mem_size) + { + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE; + return IV_FAIL; + } + + if(ps_mem_rec[i].e_mem_type != as_mem_rec_ittiam_api[i].e_mem_type) + { + if (IV_EXTERNAL_CACHEABLE_SCRATCH_MEM == as_mem_rec_ittiam_api[i].e_mem_type) + { + if (IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM == ps_mem_rec[i].e_mem_type) + { + continue; + } + } + ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE; + return IV_FAIL; + } + } + } + + + } + break; + + case IVD_CMD_GET_DISPLAY_FRAME: + { + impeg2d_get_display_frame_ip_t *ps_ip = (impeg2d_get_display_frame_ip_t *)pv_api_ip; + impeg2d_get_display_frame_op_t *ps_op = (impeg2d_get_display_frame_op_t *)pv_api_op; + + ps_op->s_ivd_get_display_frame_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_get_display_frame_ip_t.u4_size != sizeof(impeg2d_get_display_frame_ip_t)) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_op->s_ivd_get_display_frame_op_t.u4_size != sizeof(impeg2d_get_display_frame_op_t)) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_num_bufs == 0) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS; + return IV_FAIL; + } + + for(i = 0; i< (WORD32)ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_num_bufs;i++) + { + if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.pu1_bufs[i] == NULL) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL; + return IV_FAIL; + } + + if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_min_out_buf_size[i] == 0) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE; + return IV_FAIL; + } + /* + if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_min_out_buf_size[i] == 0) + { + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE; + return IV_FAIL; + } + */ + } + } + break; + case IVD_CMD_REL_DISPLAY_FRAME: + { + impeg2d_rel_display_frame_ip_t *ps_ip = (impeg2d_rel_display_frame_ip_t *)pv_api_ip; + impeg2d_rel_display_frame_op_t *ps_op = (impeg2d_rel_display_frame_op_t *)pv_api_op; + + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code = 0; + + if ((ps_ip->s_ivd_rel_display_frame_ip_t.u4_size != sizeof(impeg2d_rel_display_frame_ip_t)) + && (ps_ip->s_ivd_rel_display_frame_ip_t.u4_size != sizeof(ivd_rel_display_frame_ip_t))) + { + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if((ps_op->s_ivd_rel_display_frame_op_t.u4_size != sizeof(impeg2d_rel_display_frame_op_t)) && + (ps_op->s_ivd_rel_display_frame_op_t.u4_size != sizeof(ivd_rel_display_frame_op_t))) + { + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + } + break; + + + case IVD_CMD_SET_DISPLAY_FRAME: + { + impeg2d_set_display_frame_ip_t *ps_ip = (impeg2d_set_display_frame_ip_t *)pv_api_ip; + impeg2d_set_display_frame_op_t *ps_op = (impeg2d_set_display_frame_op_t *)pv_api_op; + UWORD32 j, i; + + ps_op->s_ivd_set_display_frame_op_t.u4_error_code = 0; + + if ((ps_ip->s_ivd_set_display_frame_ip_t.u4_size != sizeof(impeg2d_set_display_frame_ip_t)) + && (ps_ip->s_ivd_set_display_frame_ip_t.u4_size != sizeof(ivd_set_display_frame_ip_t))) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if((ps_op->s_ivd_set_display_frame_op_t.u4_size != sizeof(impeg2d_set_display_frame_op_t)) && + (ps_op->s_ivd_set_display_frame_op_t.u4_size != sizeof(ivd_set_display_frame_op_t))) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs == 0) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS; + return IV_FAIL; + } + + for(j = 0; j < ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs; j++) + { + if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs == 0) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS; + return IV_FAIL; + } + + for(i=0;i< ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs;i++) + { + if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].pu1_bufs[i] == NULL) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL; + return IV_FAIL; + } + + if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_min_out_buf_size[i] == 0) + { + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE; + return IV_FAIL; + } + } + } + } + break; + + case IVD_CMD_VIDEO_DECODE: + { + impeg2d_video_decode_ip_t *ps_ip = (impeg2d_video_decode_ip_t *)pv_api_ip; + impeg2d_video_decode_op_t *ps_op = (impeg2d_video_decode_op_t *)pv_api_op; + + ps_op->s_ivd_video_decode_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_video_decode_ip_t.u4_size != sizeof(impeg2d_video_decode_ip_t)) + { + ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_op->s_ivd_video_decode_op_t.u4_size != sizeof(impeg2d_video_decode_op_t)) + { + ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + } + break; + + case IV_CMD_RETRIEVE_MEMREC: + { + impeg2d_retrieve_mem_rec_ip_t *ps_ip = (impeg2d_retrieve_mem_rec_ip_t *)pv_api_ip; + impeg2d_retrieve_mem_rec_op_t *ps_op = (impeg2d_retrieve_mem_rec_op_t *)pv_api_op; + iv_mem_rec_t *ps_mem_rec; + + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_retrieve_mem_rec_ip_t.u4_size != sizeof(impeg2d_retrieve_mem_rec_ip_t)) + { + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + if(ps_op->s_ivd_retrieve_mem_rec_op_t.u4_size != sizeof(impeg2d_retrieve_mem_rec_op_t)) + { + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return(IV_FAIL); + } + + ps_mem_rec = ps_ip->s_ivd_retrieve_mem_rec_ip_t.pv_mem_rec_location; + /* check memrecords sizes are correct */ + for(i=0;i < NUM_MEM_RECORDS ; i++) + { + if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t)) + { + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= IVD_MEM_REC_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + } + break; + + case IVD_CMD_VIDEO_CTL: + { + UWORD32 *pu4_ptr_cmd; + UWORD32 u4_sub_command; + + pu4_ptr_cmd = (UWORD32 *)pv_api_ip; + pu4_ptr_cmd += 2; + u4_sub_command = *pu4_ptr_cmd; + + switch(u4_sub_command) + { + case IVD_CMD_CTL_SETPARAMS: + { + impeg2d_ctl_set_config_ip_t *ps_ip; + impeg2d_ctl_set_config_op_t *ps_op; + ps_ip = (impeg2d_ctl_set_config_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_set_config_op_t *)pv_api_op; + + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_ctl_set_config_ip_t.u4_size != sizeof(impeg2d_ctl_set_config_ip_t)) + { + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + case IVD_CMD_CTL_SETDEFAULT: + { + impeg2d_ctl_set_config_op_t *ps_op; + ps_op = (impeg2d_ctl_set_config_op_t *)pv_api_op; + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code = 0; + + if(ps_op->s_ivd_ctl_set_config_op_t.u4_size != sizeof(impeg2d_ctl_set_config_op_t)) + { + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_GETPARAMS: + { + impeg2d_ctl_getstatus_ip_t *ps_ip; + impeg2d_ctl_getstatus_op_t *ps_op; + + ps_ip = (impeg2d_ctl_getstatus_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_getstatus_op_t *)pv_api_op; + + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_ctl_getstatus_ip_t.u4_size != sizeof(impeg2d_ctl_getstatus_ip_t)) + { + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_getstatus_op_t.u4_size != sizeof(impeg2d_ctl_getstatus_op_t)) + { + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_GETBUFINFO: + { + impeg2d_ctl_getbufinfo_ip_t *ps_ip; + impeg2d_ctl_getbufinfo_op_t *ps_op; + ps_ip = (impeg2d_ctl_getbufinfo_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_getbufinfo_op_t *)pv_api_op; + + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_ctl_getbufinfo_ip_t.u4_size != sizeof(impeg2d_ctl_getbufinfo_ip_t)) + { + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_getbufinfo_op_t.u4_size != sizeof(impeg2d_ctl_getbufinfo_op_t)) + { + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_GETVERSION: + { + impeg2d_ctl_getversioninfo_ip_t *ps_ip; + impeg2d_ctl_getversioninfo_op_t *ps_op; + ps_ip = (impeg2d_ctl_getversioninfo_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_getversioninfo_op_t *)pv_api_op; + + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_size != sizeof(impeg2d_ctl_getversioninfo_ip_t)) + { + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_getversioninfo_op_t.u4_size != sizeof(impeg2d_ctl_getversioninfo_op_t)) + { + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_FLUSH: + { + impeg2d_ctl_flush_ip_t *ps_ip; + impeg2d_ctl_flush_op_t *ps_op; + ps_ip = (impeg2d_ctl_flush_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_flush_op_t *)pv_api_op; + + ps_op->s_ivd_ctl_flush_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_ctl_flush_ip_t.u4_size != sizeof(impeg2d_ctl_flush_ip_t)) + { + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_flush_op_t.u4_size != sizeof(impeg2d_ctl_flush_op_t)) + { + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IVD_CMD_CTL_RESET: + { + impeg2d_ctl_reset_ip_t *ps_ip; + impeg2d_ctl_reset_op_t *ps_op; + ps_ip = (impeg2d_ctl_reset_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_reset_op_t *)pv_api_op; + + ps_op->s_ivd_ctl_reset_op_t.u4_error_code = 0; + + if(ps_ip->s_ivd_ctl_reset_ip_t.u4_size != sizeof(impeg2d_ctl_reset_ip_t)) + { + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + if(ps_op->s_ivd_ctl_reset_op_t.u4_size != sizeof(impeg2d_ctl_reset_op_t)) + { + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + } + break; + + case IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS: + { + impeg2d_ctl_get_frame_dimensions_ip_t *ps_ip; + impeg2d_ctl_get_frame_dimensions_op_t *ps_op; + + ps_ip = + (impeg2d_ctl_get_frame_dimensions_ip_t *)pv_api_ip; + ps_op = + (impeg2d_ctl_get_frame_dimensions_op_t *)pv_api_op; + + if(ps_ip->u4_size + != sizeof(impeg2d_ctl_get_frame_dimensions_ip_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size + != sizeof(impeg2d_ctl_get_frame_dimensions_op_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + + case IMPEG2D_CMD_CTL_SET_NUM_CORES: + { + impeg2d_ctl_set_num_cores_ip_t *ps_ip; + impeg2d_ctl_set_num_cores_op_t *ps_op; + + ps_ip = (impeg2d_ctl_set_num_cores_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_set_num_cores_op_t *)pv_api_op; + + if(ps_ip->u4_size + != sizeof(impeg2d_ctl_set_num_cores_ip_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size + != sizeof(impeg2d_ctl_set_num_cores_op_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + +#ifdef MULTICORE + if((ps_ip->u4_num_cores < 1) || (ps_ip->u4_num_cores > MAX_THREADS)) +#else + if(ps_ip->u4_num_cores != 1) +#endif + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + return IV_FAIL; + } + break; + } + case IMPEG2D_CMD_CTL_SET_PROCESSOR: + { + impeg2d_ctl_set_processor_ip_t *ps_ip; + impeg2d_ctl_set_processor_op_t *ps_op; + + ps_ip = (impeg2d_ctl_set_processor_ip_t *)pv_api_ip; + ps_op = (impeg2d_ctl_set_processor_op_t *)pv_api_op; + + if(ps_ip->u4_size + != sizeof(impeg2d_ctl_set_processor_ip_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_IP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + if(ps_op->u4_size + != sizeof(impeg2d_ctl_set_processor_op_t)) + { + ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_op->u4_error_code |= + IVD_OP_API_STRUCT_SIZE_INCORRECT; + return IV_FAIL; + } + + break; + } + default: + break; + + } + } + break; + + default: + { *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM; + *(pu4_api_op + 1) |= IVD_UNSUPPORTED_API_CMD; + return IV_FAIL; + } + + + } + + return IV_SUCCESS; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_api_entity */ +/* */ +/* Description : */ +/* */ +/* Inputs : */ +/* Globals : <Does it use any global variables?> */ +/* Outputs : */ +/* Returns : void */ +/* */ +/* Issues : none */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 22 10 2008 100356 Draft */ +/* */ +/*****************************************************************************/ + + +IV_API_CALL_STATUS_T impeg2d_api_entity(iv_obj_t *ps_dechdl, + void *pv_api_ip, + void *pv_api_op) +{ + iv_obj_t *ps_dec_handle; + dec_state_t *ps_dec_state; + dec_state_multi_core_t *ps_dec_state_multi_core; + + impeg2d_video_decode_ip_t *ps_dec_ip; + + impeg2d_video_decode_op_t *ps_dec_op; + WORD32 bytes_remaining; + pic_buf_t *ps_disp_pic; + + + + ps_dec_ip = (impeg2d_video_decode_ip_t *)pv_api_ip; + ps_dec_op = (impeg2d_video_decode_op_t *)pv_api_op; + + memset(ps_dec_op,0,sizeof(impeg2d_video_decode_op_t)); + + ps_dec_op->s_ivd_video_decode_op_t.u4_size = sizeof(impeg2d_video_decode_op_t); + ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 0; + bytes_remaining = ps_dec_ip->s_ivd_video_decode_ip_t.u4_num_Bytes; + + ps_dec_handle = (iv_obj_t *)ps_dechdl; + + if(ps_dechdl == NULL) + { + return(IV_FAIL); + } + + + + ps_dec_state_multi_core = ps_dec_handle->pv_codec_handle; + ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0]; + + ps_dec_state->ps_disp_frm_buf = &(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf); + if(0 == ps_dec_state->u4_share_disp_buf) + { + ps_dec_state->ps_disp_frm_buf->pv_y_buf = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0]; + ps_dec_state->ps_disp_frm_buf->pv_u_buf = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1]; + ps_dec_state->ps_disp_frm_buf->pv_v_buf = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2]; + } + + ps_dec_state->ps_disp_pic = NULL; + ps_dec_state->i4_frame_decoded = 0; + /*rest bytes consumed */ + ps_dec_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = 0; + + ps_dec_op->s_ivd_video_decode_op_t.u4_error_code = IV_SUCCESS; + + if((ps_dec_ip->s_ivd_video_decode_ip_t.pv_stream_buffer == NULL)&&(ps_dec_state->u1_flushfrm==0)) + { + ps_dec_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM; + ps_dec_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_DEC_FRM_BS_BUF_NULL; + return IV_FAIL; + } + + + if (ps_dec_state->u4_num_frames_decoded > NUM_FRAMES_LIMIT) + { + ps_dec_op->s_ivd_video_decode_op_t.u4_error_code = IMPEG2D_SAMPLE_VERSION_LIMIT_ERR; + return(IV_FAIL); + } + + if(((0 == ps_dec_state->u2_header_done) || (ps_dec_state->u2_decode_header == 1)) && (ps_dec_state->u1_flushfrm == 0)) + { + impeg2d_dec_hdr(ps_dec_state,ps_dec_ip ,ps_dec_op); + bytes_remaining -= ps_dec_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed; + } + + if((1 != ps_dec_state->u2_decode_header) && ((bytes_remaining > 0) || ps_dec_state->u1_flushfrm)) + { + if(ps_dec_state->u1_flushfrm) + { + if(ps_dec_state->aps_ref_pics[1] != NULL) + { + impeg2_disp_mgr_add(&ps_dec_state->s_disp_mgr, ps_dec_state->aps_ref_pics[1], ps_dec_state->aps_ref_pics[1]->i4_buf_id); + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->aps_ref_pics[1]->i4_buf_id, BUF_MGR_REF); + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->aps_ref_pics[0]->i4_buf_id, BUF_MGR_REF); + + ps_dec_state->aps_ref_pics[1] = NULL; + ps_dec_state->aps_ref_pics[0] = NULL; + + } + else if(ps_dec_state->aps_ref_pics[0] != NULL) + { + impeg2_disp_mgr_add(&ps_dec_state->s_disp_mgr, ps_dec_state->aps_ref_pics[0], ps_dec_state->aps_ref_pics[0]->i4_buf_id); + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->aps_ref_pics[0]->i4_buf_id, BUF_MGR_REF); + + ps_dec_state->aps_ref_pics[0] = NULL; + } + ps_dec_ip->s_ivd_video_decode_ip_t.u4_size = sizeof(impeg2d_video_decode_ip_t); + ps_dec_op->s_ivd_video_decode_op_t.u4_size = sizeof(impeg2d_video_decode_op_t); + + ps_disp_pic = impeg2_disp_mgr_get(&ps_dec_state->s_disp_mgr, &ps_dec_state->i4_disp_buf_id); + + ps_dec_state->ps_disp_pic = ps_disp_pic; + if(ps_disp_pic == NULL) + { + ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 0; + } + else + { + WORD32 fmt_conv; + if(0 == ps_dec_state->u4_share_disp_buf) + { + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_y_buf = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0]; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_u_buf = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1]; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_v_buf = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2]; + fmt_conv = 1; + } + else + { + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_y_buf = ps_disp_pic->pu1_y; + if(IV_YUV_420P == ps_dec_state->i4_chromaFormat) + { + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_u_buf = ps_disp_pic->pu1_u; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_v_buf = ps_disp_pic->pu1_v; + fmt_conv = 0; + } + else + { + UWORD8 *pu1_buf; + + pu1_buf = ps_dec_state->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[1]; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_u_buf = pu1_buf; + + pu1_buf = ps_dec_state->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[2]; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_v_buf = pu1_buf; + fmt_conv = 1; + } + } + + if(fmt_conv == 1) + { + impeg2d_format_convert(ps_dec_state, ps_disp_pic, + &(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf), + 0, ps_dec_state->u2_vertical_size); + } + if(0 == ps_dec_state->u4_share_disp_buf) + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_disp_pic->i4_buf_id, BUF_MGR_DISP); + + ps_dec_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec_state->u2_vertical_size; + ps_dec_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec_state->u2_horizontal_size; + ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 1; + + ps_dec_op->s_ivd_video_decode_op_t.u4_disp_buf_id = ps_disp_pic->i4_buf_id; + ps_dec_op->s_ivd_video_decode_op_t.u4_ts = ps_disp_pic->u4_ts; + + ps_dec_op->s_ivd_video_decode_op_t.e_output_format = (IV_COLOR_FORMAT_T)ps_dec_state->i4_chromaFormat; + + ps_dec_op->s_ivd_video_decode_op_t.u4_is_ref_flag = (B_PIC != ps_dec_state->e_pic_type); + + ps_dec_op->s_ivd_video_decode_op_t.u4_progressive_frame_flag = IV_PROGRESSIVE; + + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_wd = ps_dec_state->u2_horizontal_size; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_strd = ps_dec_state->u4_frm_buf_stride; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_ht = ps_dec_state->u2_vertical_size; + + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = ps_dec_state->u2_vertical_size >> 1; + + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = ps_dec_state->u2_horizontal_size >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_strd = ps_dec_state->u4_frm_buf_stride >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = ps_dec_state->u2_vertical_size >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_size = sizeof(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf); + + switch(ps_dec_state->i4_chromaFormat) + { + case IV_YUV_420SP_UV: + case IV_YUV_420SP_VU: + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride; + break; + case IV_YUV_422ILE: + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = 0; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = 0; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = 0; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = 0; + break; + default: + break; + } + + + } + if(ps_dec_op->s_ivd_video_decode_op_t.u4_output_present) + { + if(1 == ps_dec_op->s_ivd_video_decode_op_t.u4_output_present) + { + INSERT_LOGO(ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0], + ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1], + ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2], + ps_dec_state->u4_frm_buf_stride, + ps_dec_state->u2_horizontal_size, + ps_dec_state->u2_vertical_size, + ps_dec_state->i4_chromaFormat, + ps_dec_state->u2_horizontal_size, + ps_dec_state->u2_vertical_size); + } + return(IV_SUCCESS); + } + else + { + ps_dec_state->u1_flushfrm = 0; + + return(IV_FAIL); + } + + } + else if(ps_dec_state->u1_flushfrm==0) + { + ps_dec_ip->s_ivd_video_decode_ip_t.u4_size = sizeof(impeg2d_video_decode_ip_t); + ps_dec_op->s_ivd_video_decode_op_t.u4_size = sizeof(impeg2d_video_decode_op_t); + if(ps_dec_ip->s_ivd_video_decode_ip_t.u4_num_Bytes < 4) + { + ps_dec_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_dec_ip->s_ivd_video_decode_ip_t.u4_num_Bytes; + return(IV_FAIL); + } + + if(1 == ps_dec_state->u4_share_disp_buf) + { + if(0 == impeg2_buf_mgr_check_free(ps_dec_state->pv_pic_buf_mg)) + { + ps_dec_op->s_ivd_video_decode_op_t.u4_error_code = + (IMPEG2D_ERROR_CODES_T)IVD_DEC_REF_BUF_NULL; + return IV_FAIL; + } + } + + + ps_dec_op->s_ivd_video_decode_op_t.e_output_format = (IV_COLOR_FORMAT_T)ps_dec_state->i4_chromaFormat; + + ps_dec_op->s_ivd_video_decode_op_t.u4_is_ref_flag = (B_PIC != ps_dec_state->e_pic_type); + + ps_dec_op->s_ivd_video_decode_op_t.u4_progressive_frame_flag = IV_PROGRESSIVE; + + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_wd = ps_dec_state->u2_horizontal_size; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_strd = ps_dec_state->u4_frm_buf_stride; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_ht = ps_dec_state->u2_vertical_size; + + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = ps_dec_state->u2_vertical_size >> 1; + + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = ps_dec_state->u2_horizontal_size >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_strd = ps_dec_state->u4_frm_buf_stride >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = ps_dec_state->u2_vertical_size >> 1; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_size = sizeof(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf); + + switch(ps_dec_state->i4_chromaFormat) + { + case IV_YUV_420SP_UV: + case IV_YUV_420SP_VU: + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride; + break; + case IV_YUV_422ILE: + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = 0; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = 0; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = 0; + ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = 0; + break; + default: + break; + } + + if( ps_dec_state->u1_flushfrm == 0) + { + ps_dec_state->u1_flushcnt = 0; + + /*************************************************************************/ + /* Frame Decode */ + /*************************************************************************/ + + impeg2d_dec_frm(ps_dec_state,ps_dec_ip,ps_dec_op); + + if (IVD_ERROR_NONE == + ps_dec_op->s_ivd_video_decode_op_t.u4_error_code) + { + if(ps_dec_state->ps_disp_pic) + ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 1; + else + ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 0; + if(ps_dec_state->u1_first_frame_done == 0) + { + ps_dec_state->u1_first_frame_done = 1; + } + + switch(ps_dec_state->e_pic_type) + { + case I_PIC : + ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME; + break; + + case P_PIC: + ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_P_FRAME; + break; + + case B_PIC: + ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_B_FRAME; + break; + + case D_PIC: + ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME; + break; + + default : + ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_FRAMETYPE_DEFAULT; + break; + } + + ps_dec_state->u4_num_frames_decoded++; + } + } + else + { + ps_dec_state->u1_flushcnt++; + } + } + if(ps_dec_state->ps_disp_pic) + { + ps_dec_op->s_ivd_video_decode_op_t.u4_disp_buf_id = ps_dec_state->ps_disp_pic->i4_buf_id; + ps_dec_op->s_ivd_video_decode_op_t.u4_ts = ps_dec_state->ps_disp_pic->u4_ts; + + if(0 == ps_dec_state->u4_share_disp_buf) + { + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->ps_disp_pic->i4_buf_id, BUF_MGR_DISP); + } + } + + if(1 == ps_dec_op->s_ivd_video_decode_op_t.u4_output_present) + { + INSERT_LOGO(ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0], + ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1], + ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2], + ps_dec_state->u4_frm_buf_stride, + ps_dec_state->u2_horizontal_size, + ps_dec_state->u2_vertical_size, + ps_dec_state->i4_chromaFormat, + ps_dec_state->u2_horizontal_size, + ps_dec_state->u2_vertical_size); + } + + } + + ps_dec_op->s_ivd_video_decode_op_t.u4_progressive_frame_flag = 1; + ps_dec_op->s_ivd_video_decode_op_t.e4_fld_type = ps_dec_state->s_disp_op.e4_fld_type; + + + if(ps_dec_op->s_ivd_video_decode_op_t.u4_error_code) + return IV_FAIL; + else + return IV_SUCCESS; +} diff --git a/decoder/impeg2d_bitstream.c b/decoder/impeg2d_bitstream.c new file mode 100644 index 0000000..92d3785 --- /dev/null +++ b/decoder/impeg2d_bitstream.c @@ -0,0 +1,335 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2d_bitstream.c */ +/* */ +/* Description : This file contains all the necessary examples to */ +/* establish a consistent use of Ittiam C coding */ +/* standards (based on Indian Hill C Standards) */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 10 01 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +#include <stdlib.h> + +#include "iv_datatypedef.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_macros.h" +#include "impeg2d_bitstream.h" + +#define BIT(val,bit) (UWORD16)(((val) >> (bit)) & 0x1) +/****************************************************************************** +* +* Function Name : impeg2d_bit_stream_init +* +* Description : This is a Bitstream initialising function. +* Arguments : +* stream : Pointer to the Bitstream. +* byteBuf : Address of the buffer +* size : Size of the buffer in bytes +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_bit_stream_init(stream_t *ps_stream, + UWORD8 *pu1_byte_buf, + UWORD32 u4_max_offset) +{ + UWORD8 *pu1_byte_buff; + UWORD32 *pu4_word_buf; + size_t u4_byte_addr; + UWORD32 u4_temp1,u4_temp2; + + /* Set parameters of the stream structure.Associate the structure with + the file */ + ps_stream->pv_bs_buf = pu1_byte_buf; + ps_stream->u4_offset = 0; + + /* Take care of unaligned address and create + nearest greater aligned address */ + pu1_byte_buff = (UWORD8 *)pu1_byte_buf; + u4_byte_addr = (size_t)pu1_byte_buff; + + if((u4_byte_addr & 3) == 1) + { + u4_temp1 = ((UWORD32)(*pu1_byte_buff++)) << 8; + u4_temp1 += ((UWORD32)(*pu1_byte_buff++)) << 16; + u4_temp1 += ((UWORD32)(*pu1_byte_buff++)) << 24; + + pu4_word_buf = (UWORD32 *)pu1_byte_buff; + + ps_stream->u4_offset = 8; + } + else if((u4_byte_addr & 3) == 2) + { + u4_temp1 = ((UWORD32)(*pu1_byte_buff++)) << 16; + u4_temp1 += ((UWORD32)(*pu1_byte_buff++)) << 24; + + pu4_word_buf = (UWORD32 *)pu1_byte_buff; + + ps_stream->u4_offset = 16; + } + else if((u4_byte_addr & 3) == 3) + { + u4_temp1 = (((UWORD32)(*pu1_byte_buff++)) << 24); + + pu4_word_buf = (UWORD32 *)pu1_byte_buff; + + ps_stream->u4_offset = 24; + } + else + { + pu4_word_buf = (UWORD32 *)pu1_byte_buff; + + u4_temp1 = *pu4_word_buf++; + ps_stream->u4_offset = 0; + } + + /* convert the endian ness from Little endian to Big endian so that bits + are in proper order from MSB to LSB */ + CONV_LE_TO_BE(u4_temp2,u4_temp1) + + /* Read One more word for buf nxt */ + u4_temp1 = *pu4_word_buf++; + ps_stream->u4_buf = u4_temp2; + + CONV_LE_TO_BE(u4_temp2,u4_temp1) + + ps_stream->u4_buf_nxt = u4_temp2; + + ps_stream->pu4_buf_aligned = pu4_word_buf; + + + ps_stream->u4_max_offset = (u4_max_offset << 3) + ps_stream->u4_offset; + + return; +} + + + +/****************************************************************************** +* +* Function Name : impeg2d_bit_stream_get_bit +* +* Description : This is a Bitstream processing function. It reads the +* bit currently pointed by the bit pointer in the buffer and +* advances the pointer by one. +* Arguments : +* stream : Pointer to the Bitstream. +* +* Values Returned : The bit read(0/1) +*******************************************************************************/ +INLINE UWORD8 impeg2d_bit_stream_get_bit(stream_t *ps_stream) +{ + UWORD32 u4_bit,u4_offset,u4_temp; + UWORD32 u4_curr_bit; + + u4_offset = ps_stream->u4_offset; + u4_curr_bit = u4_offset & 0x1F; + u4_bit = ps_stream->u4_buf; + + /* Move the current bit read from the current word to the + least significant bit positions of 'c'.*/ + u4_bit >>= BITS_IN_INT - u4_curr_bit - 1; + + u4_offset++; + + /* If the last bit of the last word of the buffer has been read update + the currrent buf with next, and read next buf from bit stream buffer */ + if (u4_curr_bit == 31) + { + ps_stream->u4_buf = ps_stream->u4_buf_nxt; + u4_temp = *(ps_stream->pu4_buf_aligned)++; + + CONV_LE_TO_BE(ps_stream->u4_buf_nxt,u4_temp) + } + ps_stream->u4_offset = u4_offset; + + return (u4_bit & 0x1); +} +/****************************************************************************** +* +* Function Name : impeg2d_bit_stream_flush +* +* Description : This is a Bitstream processing function. It +* advances the bit and byte pointers appropriately +* +* Arguments : +* ctxt : Pointer to the Bitstream. +* numBits : No of bits to be read +* +* Values Returned : None +*******************************************************************************/ +INLINE void impeg2d_bit_stream_flush(void* pv_ctxt, UWORD32 u4_no_of_bits) +{ + stream_t *ps_stream = (stream_t *)pv_ctxt; + + FLUSH_BITS(ps_stream->u4_offset,ps_stream->u4_buf,ps_stream->u4_buf_nxt,u4_no_of_bits,ps_stream->pu4_buf_aligned) + return; +} +/****************************************************************************** +* +* Function Name : impeg2d_bit_stream_flush_to_byte_boundary +* +* Description : This is a Bitstream processing function.It advances +* the bit and byte pointers to next byte boundary +* +* Arguments : +* stream : Pointer to the Bitstream. +* NoOfBits : No of bits to be read +* +* Values Returned : The bits read (upto 32 bits maximum) starting from the +* least significant bit and going towards most significant +* bit in the order of their occurence. +*******************************************************************************/ +INLINE void impeg2d_bit_stream_flush_to_byte_boundary(void* pv_ctxt) +{ + UWORD8 u1_bit_offset; + stream_t *ps_stream = (stream_t *)pv_ctxt; + + u1_bit_offset = (ps_stream->u4_offset) & 0x7; + + + /* if it is not byte aligned make it byte aligned*/ + if(u1_bit_offset != 0) + { + impeg2d_bit_stream_flush(ps_stream,(8 - u1_bit_offset)); + } + + + +} + + +/****************************************************************************** +* +* Function Name : ibits_next +* +* Description : This is a Bitstream processing function.It gets the +* specified number of bits from the buffer without +* altering the current pointers. It is used mainly to +* check for some specific pattern of bits like start +* code. This is equivalent to next_bits() function +* defined in MPEG-4 Visual Standard Definition of functions +* +* Arguments : +* ctxt : Pointer to the Bitstream. +* numBits : No of bits to be read +* +* Values Returned : The bits read (upto 32 bits maximum) starting from the +* least significant bit and going towards most significant +* bit in the order of their occurence. +*******************************************************************************/ +INLINE UWORD32 impeg2d_bit_stream_nxt( stream_t *ps_stream, WORD32 i4_no_of_bits) +{ + UWORD32 u4_bits,u4_offset,u4_temp; + UWORD8 u4_bit_ptr; + + ASSERT(i4_no_of_bits > 0); + + u4_offset = ps_stream->u4_offset; + u4_bit_ptr = u4_offset & 0x1F; + u4_bits = ps_stream->u4_buf << u4_bit_ptr; + + u4_bit_ptr += i4_no_of_bits; + if(32 < u4_bit_ptr) + { + /* Read bits from the next word if necessary */ + u4_temp = ps_stream->u4_buf_nxt; + u4_bit_ptr &= (BITS_IN_INT - 1); + + u4_temp = (u4_temp >> (BITS_IN_INT - u4_bit_ptr)); + + /* u4_temp consists of bits,if any that had to be read from the next word + of the buffer.The bits read from both the words are concatenated and + moved to the least significant positions of 'u4_bits'*/ + u4_bits = (u4_bits >> (32 - i4_no_of_bits)) | u4_temp; + } + else + { + u4_bits = (u4_bits >> (32 - i4_no_of_bits)); + } + + return (u4_bits); +} +/****************************************************************************** +* +* Function Name : impeg2d_bit_stream_get +* +* Description : This is a Bitstream processing function. It reads a +* specified number of bits from the current bit +* position and advances the bit and byte pointers +* appropriately +* Arguments : +* ctxt : Pointer to the Bitstream. +* numBits : No of bits to be read +* +* Values Returned : The bits read (upto 32 bits maximum) starting from the +* least significant bit and going towards most significant +* bit in the order of their occurence. +*******************************************************************************/ + +INLINE UWORD32 impeg2d_bit_stream_get(void* pv_ctxt, UWORD32 u4_num_bits) +{ + UWORD32 u4_next_bits = impeg2d_bit_stream_nxt(pv_ctxt, u4_num_bits); + impeg2d_bit_stream_flush(pv_ctxt, u4_num_bits); + return(u4_next_bits); +} + + + +/****************************************************************************** +* +* Function Name : impeg2d_bit_stream_num_bits_read +* +* Description : This is a Bitstream processing function. It reads a +* specified number of bits from the current bit +* position and advances the bit and byte pointers +* appropriately +* Arguments : +* ctxt : Pointer to the Bitstream. +* numBits : No of bits to be read +* +* Values Returned : The bits read (upto 16 bits maximum) starting from the +* least significant bit and going towards most significant +* bit in the order of their occurence. +*******************************************************************************/ +INLINE UWORD32 impeg2d_bit_stream_num_bits_read(void* pv_ctxt) +{ + stream_t *u4_no_of_bitsstream = (stream_t *)pv_ctxt; + size_t u4_temp; + UWORD32 u4_bits_read; + u4_temp = (size_t)(u4_no_of_bitsstream->pv_bs_buf); + u4_temp &= 0x3; + u4_bits_read = (u4_no_of_bitsstream->u4_offset - (u4_temp << 3)); + + return(u4_bits_read); + +} + + diff --git a/decoder/impeg2d_bitstream.h b/decoder/impeg2d_bitstream.h new file mode 100644 index 0000000..4ce4013 --- /dev/null +++ b/decoder/impeg2d_bitstream.h @@ -0,0 +1,156 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2d_bitstream.h */ +/* */ +/* Description : This file contains all the necessary examples to */ +/* establish a consistent use of Ittiam C coding */ +/* standards (based on Indian Hill C Standards) */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 10 01 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +#ifndef __IMPEG2D_BITSTREAM_H__ +#define __IMPEG2D_BITSTREAM_H__ + + + +/* Structure for the stream */ +typedef struct _stream_t +{ + void *pv_bs_buf; /* Pointer to buffer containing the + bitstream */ + + UWORD32 *pu4_buf_aligned; /* Pointer to the buffer after alignment correction, + It points to the currently usable buffer */ + + UWORD32 u4_offset; /* Offset in the buffer for the current bit */ + + UWORD32 u4_buf; /* Buffer storing the current word */ + + UWORD32 u4_buf_nxt; /* Buffer storing the next Word */ + + UWORD32 u4_max_offset; /* Max Bit stream buffer offset in bytes for error checks */ +} stream_t; + +#define GET_MARKER_BIT(dec,stream) \ +{ \ + if (impeg2d_bit_stream_get(stream,1) != 0x1) { \ + /* No need to return error if marker is not present. */ \ + } \ +} + +/* Define A macro for inlining of FlushBits */ +#define FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_no_bits,pu4_buf_aligned) \ +{ \ + UWORD32 u4_temp; \ + \ + if (((u4_offset & 0x1f) + u4_no_bits)>= 32) \ + { \ + u4_buf = u4_buf_nxt; \ + \ + u4_temp = *(pu4_buf_aligned)++; \ + \ + CONV_LE_TO_BE(u4_buf_nxt,u4_temp) \ + } \ + u4_offset += u4_no_bits; \ +} + +/* Macro to initialize the variables from stream */ +#define GET_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,stream) \ +{ \ + u4_buf = stream->u4_buf; \ + u4_buf_nxt = stream->u4_buf_nxt; \ + u4_offset = stream->u4_offset; \ + pu4_buf_aligned = stream->pu4_buf_aligned; \ +} + +/* Macro to put the stream variable values back */ +#define PUT_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,stream) \ +{ \ + stream->u4_buf = u4_buf; \ + stream->u4_buf_nxt = u4_buf_nxt; \ + stream->u4_offset = u4_offset; \ + stream->pu4_buf_aligned = pu4_buf_aligned; \ +} + +/* Macro to implement the get bits inline (ibits_nxt_inline) */ +#define IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_bits, no_of_bits) \ +{ \ + UWORD8 u4_bit_ptr; \ + UWORD32 u4_temp; \ + \ + u4_bit_ptr = u4_offset & 0x1F; \ + u4_bits = u4_buf << u4_bit_ptr; \ + \ + u4_bit_ptr += no_of_bits; \ + \ + if(32 < u4_bit_ptr) \ + { \ + /* Read bits from the next word if necessary */ \ + u4_temp = u4_buf_nxt; \ + u4_bit_ptr &= (BITS_IN_INT - 1); \ + \ + u4_temp = (u4_temp >> (BITS_IN_INT - u4_bit_ptr)); \ + \ + /* u4_temp consists of bits,if any that had to be read from the next word*/ \ + /* of the buffer.The bits read from both the words are concatenated and*/ \ + /* moved to the least significant positions of 'u4_bits'*/ \ + u4_bits = (u4_bits >> (32 - no_of_bits)) | u4_temp; \ + } \ + else \ + { \ + u4_bits = (u4_bits >> (32 - no_of_bits)); \ + } \ +} + +/* Macro to implement the get bits inline (ibits_get_inline) */ +#define IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,no_of_bits) \ +{ \ + IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_bits, no_of_bits) \ + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,no_of_bits,pu4_buf_aligned) \ +} + +void impeg2d_bit_stream_init(stream_t *stream, + UWORD8 *byteBuf, + UWORD32 u4_max_offset); +INLINE UWORD8 impeg2d_bit_stream_get_bit(stream_t *stream); +INLINE void impeg2d_bit_stream_flush(void* ctxt, UWORD32 NoOfBits); +INLINE void impeg2d_bit_stream_flush_to_byte_boundary(void* ctxt); +INLINE UWORD32 impeg2d_bit_stream_nxt(stream_t *stream, WORD32 NoOfBits); + +INLINE UWORD32 impeg2d_bit_stream_get(void* ctxt, UWORD32 numBits); +INLINE UWORD32 impeg2d_bit_stream_num_bits_read(void* ctxt); + + + + + + + +#endif /* __IMPEG2D_BITSTREAM_H__ */ diff --git a/decoder/impeg2d_d_pic.c b/decoder/impeg2d_d_pic.c new file mode 100644 index 0000000..a90e16d --- /dev/null +++ b/decoder/impeg2d_d_pic.c @@ -0,0 +1,251 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#include "iv_datatypedef.h" +#include "impeg2_defs.h" +#include "impeg2_globals.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_buf_mgr.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_vld.h" +#include "impeg2d_vld_tables.h" + +#define BLK_SIZE 8 +#define LUMA_BLK_SIZE (2 * (BLK_SIZE)) +#define CHROMA_BLK_SIZE (BLK_SIZE) +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_get_luma_dc_diff */ +/* */ +/* Description : Decode the DC differential value from the bitstream for */ +/* luma block */ +/* */ +/* Inputs : stream - Input stream */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Decode the vlc for dc_diff */ +/* */ +/* Outputs : dc_diff - dc differential used in dc prediction */ +/* */ +/* Returns : dc_diff - dc differential used in dc prediction */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +WORD16 impeg2d_get_luma_dc_diff(stream_t *ps_stream) +{ + UWORD16 u2_dc_size; + WORD16 i2_dc_diff; + + u2_dc_size = impeg2d_dec_vld_symbol(ps_stream,gai2_impeg2d_dct_dc_size[0], + MPEG2_DCT_DC_LUMA_SIZE_LEN) + + MPEG2_DCT_DC_SIZE_OFFSET; + if (u2_dc_size != 0) + { + i2_dc_diff = impeg2d_bit_stream_get(ps_stream,u2_dc_size); + if ((i2_dc_diff & (1 << (u2_dc_size - 1))) == 0) + i2_dc_diff -= (1 << u2_dc_size) - 1; + } + else + { + i2_dc_diff = 0; + } + return i2_dc_diff; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_get_chroma_dc_diff */ +/* */ +/* Description : Decode the DC differential value from the bitstream for */ +/* chroma block */ +/* */ +/* Inputs : stream - Input stream */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Decode the vlc for dc_diff */ +/* */ +/* Outputs : dc_diff - dc differential used in dc prediction */ +/* */ +/* Returns : dc_diff - dc differential used in dc prediction */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +WORD16 impeg2d_get_chroma_dc_diff(stream_t *ps_stream) +{ + UWORD16 u2_dc_size; + WORD16 i2_dc_diff; + u2_dc_size = impeg2d_dec_vld_symbol(ps_stream,gai2_impeg2d_dct_dc_size[1], + MPEG2_DCT_DC_CHROMA_SIZE_LEN) + + MPEG2_DCT_DC_SIZE_OFFSET; + if (u2_dc_size != 0) + { + i2_dc_diff = impeg2d_bit_stream_get(ps_stream,u2_dc_size); + if ((i2_dc_diff & (1 << (u2_dc_size - 1))) == 0) + i2_dc_diff -= (1 << u2_dc_size) - 1; + } + else + { + i2_dc_diff = 0; + } + return i2_dc_diff; +} +/******************************************************************************* +* Function Name : impeg2d_dec_d_slice +* +* Description : Decodes I slice +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_d_slice(dec_state_t *ps_dec) +{ + UWORD32 i; + yuv_buf_t *ps_cur_frm_buf = &ps_dec->s_cur_frm_buf; + + stream_t *ps_stream = &ps_dec->s_bit_stream; + UWORD8 *pu1_vld_buf; + + WORD16 i2_dc_diff; + UWORD32 u4_frame_width = ps_dec->u2_frame_width; + UWORD32 u4_frm_offset = 0; + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + { + u4_frame_width <<= 1; + if(ps_dec->u2_picture_structure == BOTTOM_FIELD) + { + u4_frm_offset = ps_dec->u2_frame_width; + } + } + + do + { + + UWORD32 u4_x_offset, u4_y_offset; + UWORD32 u4_blk_pos; + WORD16 i2_dc_val; + + UWORD32 u4_dst_x_offset = u4_frm_offset + (ps_dec->u2_mb_x << 4); + UWORD32 u4_dst_y_offset = (ps_dec->u2_mb_y << 4) * u4_frame_width; + UWORD8 *pu1_vld_buf8 = ps_cur_frm_buf->pu1_y + u4_dst_x_offset + u4_dst_y_offset; + UWORD32 u4_dst_wd = u4_frame_width; + /*------------------------------------------------------------------*/ + /* Discard the Macroblock stuffing in case of MPEG-1 stream */ + /*------------------------------------------------------------------*/ + while(impeg2d_bit_stream_nxt(ps_stream,MB_STUFFING_CODE_LEN) == MB_STUFFING_CODE) + impeg2d_bit_stream_flush(ps_stream,MB_STUFFING_CODE_LEN); + + /*------------------------------------------------------------------*/ + /* Flush 2 bits from bitstream [MB_Type and MacroBlockAddrIncrement]*/ + /*------------------------------------------------------------------*/ + impeg2d_bit_stream_flush(ps_stream,1); + + if(impeg2d_bit_stream_get(ps_stream, 1) != 0x01) + { + /* Ignore and continue decoding. */ + } + + /* Process LUMA blocks of the MB */ + for(i = 0; i < NUM_LUMA_BLKS; ++i) + { + + u4_x_offset = gai2_impeg2_blk_x_off[i]; + u4_y_offset = gai2_impeg2_blk_y_off_frm[i] ; + u4_blk_pos = (u4_y_offset * u4_dst_wd) + u4_x_offset; + pu1_vld_buf = pu1_vld_buf8 + u4_blk_pos; + + i2_dc_diff = impeg2d_get_luma_dc_diff(ps_stream); + i2_dc_val = ps_dec->u2_def_dc_pred[Y_LUMA] + i2_dc_diff; + ps_dec->u2_def_dc_pred[Y_LUMA] = i2_dc_val; + i2_dc_val = CLIP_U8(i2_dc_val); + + ps_dec->pf_memset_8bit_8x8_block(pu1_vld_buf, i2_dc_val, u4_dst_wd); + } + + + + /* Process U block of the MB */ + + u4_dst_x_offset >>= 1; + u4_dst_y_offset >>= 2; + u4_dst_wd >>= 1; + pu1_vld_buf = ps_cur_frm_buf->pu1_u + u4_dst_x_offset + u4_dst_y_offset; + i2_dc_diff = impeg2d_get_chroma_dc_diff(ps_stream); + i2_dc_val = ps_dec->u2_def_dc_pred[U_CHROMA] + i2_dc_diff; + ps_dec->u2_def_dc_pred[U_CHROMA] = i2_dc_val; + i2_dc_val = CLIP_U8(i2_dc_val); + ps_dec->pf_memset_8bit_8x8_block(pu1_vld_buf, i2_dc_val, u4_dst_wd); + + + /* Process V block of the MB */ + + pu1_vld_buf = ps_cur_frm_buf->pu1_v + u4_dst_x_offset + u4_dst_y_offset; + i2_dc_diff = impeg2d_get_chroma_dc_diff(ps_stream); + i2_dc_val = ps_dec->u2_def_dc_pred[V_CHROMA] + i2_dc_diff; + ps_dec->u2_def_dc_pred[V_CHROMA] = i2_dc_val; + i2_dc_val = CLIP_U8(i2_dc_val); + ps_dec->pf_memset_8bit_8x8_block(pu1_vld_buf, i2_dc_val, u4_dst_wd); + + /* Common MB processing Steps */ + + + ps_dec->u2_num_mbs_left--; + ps_dec->u2_mb_x++; + + if(ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset) + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + else if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset)) + { + ps_dec->u2_mb_x = 0; + ps_dec->u2_mb_y++; + + } + + /* Flush end of macro block */ + impeg2d_bit_stream_flush(ps_stream,1); + } + while(ps_dec->u2_num_mbs_left != 0 && impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,23) != 0x0); + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +}/* End of impeg2d_dec_d_slice() */ diff --git a/decoder/impeg2d_debug.c b/decoder/impeg2d_debug.c new file mode 100644 index 0000000..ff33290 --- /dev/null +++ b/decoder/impeg2d_debug.c @@ -0,0 +1,509 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <string.h> +#include <stdio.h> + +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "impeg2d.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_macros.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" +#include "impeg2_globals.h" + +#include "impeg2d_bitstream.h" +#include "impeg2d_api.h" +#include "impeg2d_structs.h" +#include "impeg2d_debug.h" + +#if STATISTICS +WORD32 gai4_impeg2d_idct_inp_last_nonzero_histogram[64] = {0}; +WORD32 gai4_impeg2d_idct_inp_num_nonzero_histogram[64] = {0}; +WORD32 gai4_impeg2d_idct_inp_last_non_zero_row_histogram[8] = {0}; + +WORD32 gai4_impeg2d_iqnt_inp_last_nonzero_histogram[64] = {0}; +WORD32 gai4_impeg2d_iqnt_inp_num_nonzero_histogram[64] = {0}; +WORD32 gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[8] = {0}; + +WORD32 gi4_impeg2d_idct_inp_only_first_coeff = 0; +WORD32 gi4_impeg2d_idct_inp_only_last_coeff = 0; +WORD32 gi4_impeg2d_idct_inp_only_first_n_last_coeff = 0; +WORD32 gi4_impeg2d_idct_cnt = 0; + + +WORD32 gi4_impeg2d_iqnt_inp_only_first_coeff = 0; +WORD32 gi4_impeg2d_iqnt_inp_only_last_coeff = 0; +WORD32 gi4_impeg2d_iqnt_inp_only_first_n_last_coeff = 0; +WORD32 gi4_impeg2d_iqnt_cnt = 0; + + +void impeg2d_iqnt_inp_statistics(WORD16 *pi2_iqnt_inp, + WORD32 i4_non_zero_cols, + WORD32 i4_non_zero_rows) +{ + WORD32 i, j; + WORD32 i4_last_row = 0, i4_last_col = 0; + WORD32 i4_num_non_zero = 0; + WORD32 i4_non_zero_cols_computed = 0; + WORD32 i4_non_zero_rows_computed = 0; + + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + if(pi2_iqnt_inp[i * 8 + j]) + { + i4_non_zero_cols_computed |= (1 << j); + i4_non_zero_rows_computed |= (1 << i); + } + } + } + + if(i4_non_zero_cols_computed != i4_non_zero_cols) + { + printf("IQ Input: Invalid non_zero_cols 0x%x non_zero_cols_computed 0x%x\n", i4_non_zero_cols, i4_non_zero_cols_computed); + } + if(i4_non_zero_rows_computed != i4_non_zero_rows) + { + printf("IQ Input: Invalid non_zero_rows 0x%x non_zero_rows_computed 0x%x\n", i4_non_zero_rows, i4_non_zero_rows_computed); + } + { + WORD32 last_non_zero_row = 32 - CLZ(i4_non_zero_rows); + gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[last_non_zero_row - 1]++; + } + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + if(pi2_iqnt_inp[i * 8 + j]) + { + i4_last_col = MAX(i4_last_col, j); + i4_last_row = MAX(i4_last_row, i); + i4_num_non_zero++; + } + } + } + gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i4_last_row * 8 + i4_last_col]++; + gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i4_num_non_zero]++; + gi4_impeg2d_iqnt_cnt++; + /* Check if only (0,0) and (7,7) are non zero */ + if(i4_num_non_zero == 1) + { + if(pi2_iqnt_inp[7 * 8 + 7]) + gi4_impeg2d_iqnt_inp_only_last_coeff++; + } + if(i4_num_non_zero == 1) + { + if(pi2_iqnt_inp[0]) + gi4_impeg2d_iqnt_inp_only_first_coeff++; + } + + if(i4_num_non_zero == 2) + { + if((pi2_iqnt_inp[0]) && (1 == pi2_iqnt_inp[7 * 8 + 7])) + gi4_impeg2d_iqnt_inp_only_first_n_last_coeff++; + } +} + +void impeg2d_idct_inp_statistics(WORD16 *pi2_idct_inp, + WORD32 i4_non_zero_cols, + WORD32 i4_non_zero_rows) +{ + WORD32 i, j; + WORD32 i4_last_row = 0, i4_last_col = 0; + WORD32 i4_num_non_zero = 0; + WORD32 i4_non_zero_cols_computed = 0; + WORD32 i4_non_zero_rows_computed = 0; + + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + if(pi2_idct_inp[i * 8 + j]) + { + i4_non_zero_cols_computed |= (1 << j); + i4_non_zero_rows_computed |= (1 << i); + } + } + } + + if(i4_non_zero_cols_computed != i4_non_zero_cols) + { + printf("IDCT Input: Invalid non_zero_cols 0x%x non_zero_cols_computed 0x%x\n", i4_non_zero_cols, i4_non_zero_cols_computed); + } + if(i4_non_zero_rows_computed != i4_non_zero_rows) + { + printf("IDCT Input: Invalid non_zero_rows 0x%x non_zero_rows_computed 0x%x\n", i4_non_zero_rows, i4_non_zero_rows_computed); + } + + { + WORD32 last_non_zero_row = 32 - CLZ(i4_non_zero_rows); + gai4_impeg2d_idct_inp_last_non_zero_row_histogram[last_non_zero_row - 1]++; + } + + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + if(pi2_idct_inp[i * 8 + j]) + { + i4_last_col = MAX(i4_last_col, j); + i4_last_row = MAX(i4_last_row, i); + i4_num_non_zero++; + } + } + } + gai4_impeg2d_idct_inp_last_nonzero_histogram[i4_last_row * 8 + i4_last_col]++; + gai4_impeg2d_idct_inp_num_nonzero_histogram[i4_num_non_zero]++; + gi4_impeg2d_idct_cnt++; + /* Check if only (0,0) and (7,7) are non zero */ + if(i4_num_non_zero == 1) + { + if(pi2_idct_inp[7 * 8 + 7]) + gi4_impeg2d_idct_inp_only_last_coeff++; + } + if(i4_num_non_zero == 1) + { + if(pi2_idct_inp[0]) + gi4_impeg2d_idct_inp_only_first_coeff++; + } + + if(i4_num_non_zero == 2) + { + if((pi2_idct_inp[0]) && (1 == pi2_idct_inp[7 * 8 + 7])) + gi4_impeg2d_idct_inp_only_first_n_last_coeff++; + } +} +void impeg2d_print_idct_inp_statistics() +{ + WORD32 i, j; + WORD32 i4_sum; + WORD32 i4_accumulator; + i4_sum = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + i4_sum += gai4_impeg2d_idct_inp_last_nonzero_histogram[i * 8 + j]; + } + } + printf("IDCT input : Only last coeff non-zero %8.2f\n", (gi4_impeg2d_idct_inp_only_last_coeff * 100.0) / gi4_impeg2d_idct_cnt); + printf("IDCT input : Only first coeff non-zero (Includes DC + mismatch) %8.2f\n", (gi4_impeg2d_idct_inp_only_first_coeff * 100.0) / gi4_impeg2d_idct_cnt); + + printf("IDCT input : Last non-zero coeff histogram\n"); + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val = gai4_impeg2d_idct_inp_last_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum; + printf("%8.2f \t", val); + + } + printf("\n"); + } + + printf("IDCT input : Cumulative Last non-zero coeff histogram\n"); + i4_accumulator = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val; + i4_accumulator += gai4_impeg2d_idct_inp_last_nonzero_histogram[i * 8 + j]; + val = i4_accumulator * 100.0 / i4_sum; + + printf("%8.2f \t", val); + + } + printf("\n"); + } + + + + printf("IDCT input : Number of non-zero coeff histogram\n"); + i4_sum = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + i4_sum += gai4_impeg2d_idct_inp_num_nonzero_histogram[i * 8 + j]; + } + } + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val = gai4_impeg2d_idct_inp_num_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum; + printf("%8.2f \t", val); + + } + printf("\n"); + } + + printf("IDCT input : Cumulative number of non-zero coeffs histogram\n"); + i4_accumulator = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val; + i4_accumulator += gai4_impeg2d_idct_inp_num_nonzero_histogram[i * 8 + j]; + val = i4_accumulator * 100.0 / i4_sum; + printf("%8.2f \t", val); + + } + printf("\n"); + } + + printf("IDCT input : Last non-zero row histogram\n"); + + + { + i4_accumulator = 0; + for(i = 0; i < 8; i++) + { + i4_accumulator += gai4_impeg2d_idct_inp_last_non_zero_row_histogram[i]; + } + for(i = 0; i < 8; i++) + { + double val = gai4_impeg2d_idct_inp_last_non_zero_row_histogram[i] * 100.0 / i4_accumulator; + printf("%8.2f \t", val); + } + printf("\n"); + } + + + + +} + +void impeg2d_print_iqnt_inp_statistics() +{ + WORD32 i, j; + WORD32 i4_sum; + WORD32 i4_accumulator; + i4_sum = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + i4_sum += gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i * 8 + j]; + } + } + printf("IQnt input : Only last coeff non-zero %8.2f\n", (gi4_impeg2d_iqnt_inp_only_last_coeff * 100.0) / gi4_impeg2d_iqnt_cnt); + printf("IQnt input : Only first coeff non-zero (Includes DC + mismatch) %8.2f\n", (gi4_impeg2d_iqnt_inp_only_first_coeff * 100.0) / gi4_impeg2d_idct_cnt); + + printf("IQnt input : Last non-zero coeff histogram\n"); + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val = gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum; + printf("%8.2f \t", val); + + } + printf("\n"); + } + + printf("IQnt input : Cumulative Last non-zero coeff histogram\n"); + i4_accumulator = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val; + i4_accumulator += gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i * 8 + j]; + val = i4_accumulator * 100.0 / i4_sum; + + printf("%8.2f \t", val); + + } + printf("\n"); + } + + + + printf("IQnt input : Number of non-zero coeff histogram\n"); + i4_sum = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + i4_sum += gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i * 8 + j]; + } + } + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val = gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum; + printf("%8.2f \t", val); + + } + printf("\n"); + } + + printf("IQnt input : Cumulative number of non-zero coeffs histogram\n"); + i4_accumulator = 0; + for(i = 0; i < 8; i++) + { + for(j = 0; j < 8; j++) + { + double val; + i4_accumulator += gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i * 8 + j]; + val = i4_accumulator * 100.0 / i4_sum; + printf("%8.2f \t", val); + + } + printf("\n"); + } + + printf("IQnt input : Last non-zero row histogram\n"); + + + { + i4_accumulator = 0; + for(i = 0; i < 8; i++) + { + i4_accumulator += gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[i]; + } + for(i = 0; i < 8; i++) + { + double val = gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[i] * 100.0 / i4_accumulator; + printf("%8.2f \t", val); + } + printf("\n"); + } + +} + +void impeg2d_print_statistics() +{ + impeg2d_print_idct_inp_statistics(); + impeg2d_print_iqnt_inp_statistics(); +} + + +#endif + +#if DEBUG_MB + +static UWORD32 u4_debug_frm = 12; +static UWORD32 u4_debug_mb_x = 3; +static UWORD32 u4_debug_mb_y = 0; + +static UWORD32 u4_debug_frm_num = 0; + +/*****************************************************************************/ +/* */ +/* Function Name : example_of_a_function */ +/* */ +/* Description : This function illustrates the use of C coding standards. */ +/* switch/case, if, for, block comments have been shown */ +/* here. */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void impeg2d_trace_mb_start(UWORD32 u4_mb_x, UWORD32 u4_mb_y) +{ + UWORD32 u4_frm_num = impeg2d_frm_num_get(); + + if(u4_frm_num == u4_debug_frm && u4_mb_x == u4_debug_mb_x && u4_mb_y == u4_debug_mb_y) + { +// printf(""); + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : example_of_a_function */ +/* */ +/* Description : This function illustrates the use of C coding standards. */ +/* switch/case, if, for, block comments have been shown */ +/* here. */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Ittiam Draft */ +/* */ +/*****************************************************************************/ +void impeg2d_frm_num_set(void) +{ + u4_debug_frm_num++; +} + + +/*****************************************************************************/ +/* */ +/* Function Name : example_of_a_function */ +/* */ +/* Description : This function illustrates the use of C coding standards. */ +/* switch/case, if, for, block comments have been shown */ +/* here. */ +/* Inputs : <What inputs does the function take?> */ +/* Globals : <Does it use any global variables?> */ +/* Processing : <Describe how the function operates - include algorithm */ +/* description> */ +/* Outputs : <What does the function produce?> */ +/* Returns : <What does the function return?> */ +/* */ +/* Issues : <List any issues or problems with this function> */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 13 07 2002 Ittiam Draft */ +/* */ +/*****************************************************************************/ +UWORD32 impeg2d_frm_num_get(void) +{ + return(u4_debug_frm_num); +} + +#endif diff --git a/decoder/impeg2d_debug.h b/decoder/impeg2d_debug.h new file mode 100644 index 0000000..5780427 --- /dev/null +++ b/decoder/impeg2d_debug.h @@ -0,0 +1,121 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : c_coding_example.h */ +/* */ +/* Description : This file contains all the necessary examples to */ +/* establish a consistent use of Ittiam C coding */ +/* standards (based on Indian Hill C Standards) */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 10 01 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ +#ifndef __IMPEG2D_DEBUG_H__ +#define __IMPEG2D_DEBUG_H__ + + +/*************************************************************************/ +/* DEBUG */ +/*************************************************************************/ +#define DEBUG_MB 0 + + +#if DEBUG_MB +void impeg2d_trace_mb_start(UWORD32 mb_x, UWORD32 mb_y); +void impeg2d_frm_num_set(void); +UWORD32 impeg2d_frm_num_get(void); + +#define IMPEG2D_TRACE_MB_START(mb_x, mb_y) void impeg2d_trace_mb_start(UWORD32 mb_x, UWORD32 mb_y); +#define IMPEG2D_FRM_NUM_SET() void impeg2d_frm_num_set(void); +#define IMPEG2D_FRM_NUM_GET() UWORD32 impeg2d_frm_num_get(void); +#else +#define IMPEG2D_TRACE_MB_START(mb_x, mb_y) +#define IMPEG2D_FRM_NUM_SET() +#define IMPEG2D_FRM_NUM_GET() +#endif + + +#define STATISTICS 0 + +#if STATISTICS +void impeg2d_idct_inp_statistics(WORD16 *pi2_idct_inp, WORD32 non_zero_cols, WORD32 non_zero_rows); +void impeg2d_iqnt_inp_statistics(WORD16 *pi2_iqnt_inp, WORD32 non_zero_cols, WORD32 non_zero_rows); +void impeg2d_print_statistics(void); +#define IMPEG2D_IDCT_INP_STATISTICS(pi2_idct_inp, non_zero_cols, non_zero_rows) impeg2d_idct_inp_statistics(pi2_idct_inp, non_zero_cols, non_zero_rows) +#define IMPEG2D_IQNT_INP_STATISTICS(pi2_iqnt_inp, non_zero_cols, non_zero_rows) impeg2d_iqnt_inp_statistics(pi2_iqnt_inp, non_zero_cols, non_zero_rows) +#define IMPEG2D_PRINT_STATISTICS() impeg2d_print_statistics() +#else +#define IMPEG2D_IDCT_INP_STATISTICS(pi2_idct_inp, non_zero_cols, non_zero_rows) +#define IMPEG2D_IQNT_INP_STATISTICS(pi2_iqnt_inp, non_zero_cols, non_zero_rows) +#define IMPEG2D_PRINT_STATISTICS() +#endif + + +#if 0 +#define PROFILE_DIS_SKIP_MB +#define PROFILE_DIS_MC +#define PROFILE_DIS_INVQUANT +#define PROFILE_DIS_IDCT +#define PROFILE_DIS_MEMSET_RESBUF +#endif + + +#ifdef PROFILE_DIS_SKIP_MB +#define PROFILE_DISABLE_SKIP_MB() return; +#else +#define PROFILE_DISABLE_SKIP_MB() +#endif + +#ifdef PROFILE_DIS_MC +#define PROFILE_DISABLE_MC_IF0 if(0) +#define PROFILE_DISABLE_MC_RETURN return; +#else +#define PROFILE_DISABLE_MC_IF0 +#define PROFILE_DISABLE_MC_RETURN +#endif + +#ifdef PROFILE_DIS_INVQUANT +#define PROFILE_DISABLE_INVQUANT_IF0 if(0) +#else +#define PROFILE_DISABLE_INVQUANT_IF0 +#endif + +#ifdef PROFILE_DIS_IDCT +#define PROFILE_DISABLE_IDCT_IF0 if(0) +#else +#define PROFILE_DISABLE_IDCT_IF0 +#endif + +#ifdef PROFILE_DIS_MEMSET_RESBUF +#define PROFILE_DISABLE_MEMSET_RESBUF_IF0 if(0) +#else +#define PROFILE_DISABLE_MEMSET_RESBUF_IF0 +#endif + + +#endif /* __IMPEG2D_DEBUG_H__ */ diff --git a/decoder/impeg2d_dec_hdr.c b/decoder/impeg2d_dec_hdr.c new file mode 100644 index 0000000..15e61fb --- /dev/null +++ b/decoder/impeg2d_dec_hdr.c @@ -0,0 +1,1733 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" +#include "impeg2_platform_macros.h" +#include "ithread.h" +#include "impeg2_job_queue.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_api.h" +#include "impeg2d_structs.h" +#include "impeg2_globals.h" +#include "impeg2d_pic_proc.h" + + + +/****************************************************************************** +* Function Name : impeg2d_next_start_code +* +* Description : Peek for next_start_code from the stream_t. +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : None +******************************************************************************/ +void impeg2d_next_start_code(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + ps_stream = &ps_dec->s_bit_stream; + impeg2d_bit_stream_flush_to_byte_boundary(ps_stream); + + while ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX) + && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)) + { + impeg2d_bit_stream_get(ps_stream,8); + } + return; +} +/****************************************************************************** +* Function Name : impeg2d_next_code +* +* Description : Peek for next_start_code from the stream_t. +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : None +******************************************************************************/ +void impeg2d_next_code(dec_state_t *ps_dec, UWORD32 u4_start_code_val) +{ + stream_t *ps_stream; + ps_stream = &ps_dec->s_bit_stream; + impeg2d_bit_stream_flush_to_byte_boundary(ps_stream); + + while ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) != u4_start_code_val) + && (ps_dec->s_bit_stream.u4_offset <= ps_dec->s_bit_stream.u4_max_offset)) + { + + if (impeg2d_bit_stream_get(ps_stream,8) != 0) + { + /* Ignore stuffing bit errors. */ + } + + } + return; +} +/****************************************************************************** +* Function Name : impeg2d_peek_next_start_code +* +* Description : Peek for next_start_code from the stream_t. +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : None +******************************************************************************/ +void impeg2d_peek_next_start_code(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + ps_stream = &ps_dec->s_bit_stream; + impeg2d_bit_stream_flush_to_byte_boundary(ps_stream); + + while ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX) + && (ps_dec->s_bit_stream.u4_offset <= ps_dec->s_bit_stream.u4_max_offset)) + { + impeg2d_bit_stream_get(ps_stream,8); + } + return; +} +/****************************************************************************** +* +* Function Name : impeg2d_dec_seq_hdr +* +* Description : Decodes Sequence header information +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : None +******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_hdr(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + ps_stream = &ps_dec->s_bit_stream; + UWORD16 u2_height; + UWORD16 u2_width; + + if (impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) != SEQUENCE_HEADER_CODE) + { + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND; + + } + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + + u2_width = impeg2d_bit_stream_get(ps_stream,12); + u2_height = impeg2d_bit_stream_get(ps_stream,12); + + if ((u2_width != ps_dec->u2_horizontal_size) + || (u2_height != ps_dec->u2_vertical_size)) + { + if (0 == ps_dec->u2_header_done) + { + /* This is the first time we are reading the resolution */ + ps_dec->u2_horizontal_size = u2_width; + ps_dec->u2_vertical_size = u2_height; + if (0 == ps_dec->u4_frm_buf_stride) + { + ps_dec->u4_frm_buf_stride = (UWORD32) ALIGN16(u2_width); + } + } + else + { + if((u2_width > ps_dec->u2_create_max_width) + || (u2_height > ps_dec->u2_create_max_height)) + { + IMPEG2D_ERROR_CODES_T e_error = IMPEG2D_UNSUPPORTED_DIMENSIONS; + + ps_dec->u2_reinit_max_height = u2_height; + ps_dec->u2_reinit_max_width = u2_width; + + return e_error; + } + else + { + /* The resolution has changed */ + return (IMPEG2D_ERROR_CODES_T)IVD_RES_CHANGED; + } + } + } + + if((ps_dec->u2_horizontal_size > ps_dec->u2_create_max_width) + || (ps_dec->u2_vertical_size > ps_dec->u2_create_max_height)) + { + IMPEG2D_ERROR_CODES_T e_error = IMPEG2D_UNSUPPORTED_DIMENSIONS; + return SET_IVD_FATAL_ERROR(e_error); + } + + + /*------------------------------------------------------------------------*/ + /* Flush the following as they are not being used */ + /* aspect_ratio_info (4 bits) */ + /*------------------------------------------------------------------------*/ + ps_dec->u2_aspect_ratio_info = impeg2d_bit_stream_get(ps_stream,4); + + /*------------------------------------------------------------------------*/ + /* Frame rate code(4 bits) */ + /*------------------------------------------------------------------------*/ + ps_dec->u2_frame_rate_code = impeg2d_bit_stream_get(ps_stream,4); + /*------------------------------------------------------------------------*/ + /* Flush the following as they are not being used */ + /* bit_rate_value (18 bits) */ + /*------------------------------------------------------------------------*/ + impeg2d_bit_stream_flush(ps_stream,18); + GET_MARKER_BIT(ps_dec,ps_stream); + /*------------------------------------------------------------------------*/ + /* Flush the following as they are not being used */ + /* vbv_buffer_size_value(10 bits), constrained_parameter_flag (1 bit) */ + /*------------------------------------------------------------------------*/ + impeg2d_bit_stream_flush(ps_stream,11); + + /*------------------------------------------------------------------------*/ + /* Quantization matrix for the intra blocks */ + /*------------------------------------------------------------------------*/ + if(impeg2d_bit_stream_get_bit(ps_stream) == 1) + { + UWORD16 i; + for(i = 0; i < NUM_PELS_IN_BLOCK; i++) + { + ps_dec->au1_intra_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] = (UWORD8)impeg2d_bit_stream_get(ps_stream,8); + } + + } + else + { + memcpy(ps_dec->au1_intra_quant_matrix,gau1_impeg2_intra_quant_matrix_default, + NUM_PELS_IN_BLOCK); + } + + /*------------------------------------------------------------------------*/ + /* Quantization matrix for the inter blocks */ + /*------------------------------------------------------------------------*/ + if(impeg2d_bit_stream_get_bit(ps_stream) == 1) + { + UWORD16 i; + for(i = 0; i < NUM_PELS_IN_BLOCK; i++) + { + ps_dec->au1_inter_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] = (UWORD8)impeg2d_bit_stream_get(ps_stream,8); + } + } + else + { + memcpy(ps_dec->au1_inter_quant_matrix,gau1_impeg2_inter_quant_matrix_default, + NUM_PELS_IN_BLOCK); + } + impeg2d_next_start_code(ps_dec); + + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +} + +/****************************************************************************** +* +* Function Name : impeg2d_dec_seq_ext +* +* Description : Gets additional sequence data. +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : None +******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_ext(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + + ps_stream = &ps_dec->s_bit_stream; + + if (impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) != EXTENSION_START_CODE) + { + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND; + + } + /* Flush the extension start code */ + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + + /* Flush extension start code identifier */ + impeg2d_bit_stream_flush(ps_stream,4); + + /*----------------------------------------------------------------------*/ + /* Profile and Level information */ + /*----------------------------------------------------------------------*/ + { + UWORD32 u4_esc_bit, u4_profile, u4_level; + + /* Read the profile and level information */ + /* check_profile_and_level: Table 8-1 */ + /* [7:7] 1 Escape bit */ + /* [6:4] 3 Profile identification */ + /* [3:0] 4 Level identification */ + + u4_esc_bit = impeg2d_bit_stream_get_bit(ps_stream); + u4_profile = impeg2d_bit_stream_get(ps_stream,3); + u4_level = impeg2d_bit_stream_get(ps_stream,4); + UNUSED(u4_profile); + UNUSED(u4_level); + /* + if( escBit == 1 || + profile < MPEG2_MAIN_PROFILE || + level < MPEG2_MAIN_LEVEL) + */ + if (1 == u4_esc_bit) + { + return IMPEG2D_PROF_LEVEL_NOT_SUPPORTED; + } + } + + ps_dec->u2_progressive_sequence = impeg2d_bit_stream_get_bit(ps_stream); + + /* Read the chrominance format */ + if(impeg2d_bit_stream_get(ps_stream,2) != 0x1) + return IMPEG2D_CHROMA_FMT_NOT_SUP; + + /* Read the 2 most significant bits from horizontal_size */ + ps_dec->u2_horizontal_size += (impeg2d_bit_stream_get(ps_stream,2) << 12); + + /* Read the 2 most significant bits from vertical_size */ + ps_dec->u2_vertical_size += (impeg2d_bit_stream_get(ps_stream,2) << 12); + + /*-----------------------------------------------------------------------*/ + /* Flush the following as they are not used now */ + /* bit_rate_extension 12 */ + /* marker_bit 1 */ + /* vbv_buffer_size_extension 8 */ + /* low_delay 1 */ + /*-----------------------------------------------------------------------*/ + impeg2d_bit_stream_flush(ps_stream,12); + GET_MARKER_BIT(ps_dec,ps_stream); + impeg2d_bit_stream_flush(ps_stream,9); + /*-----------------------------------------------------------------------*/ + /* frame_rate_extension_n 2 */ + /* frame_rate_extension_d 5 */ + /*-----------------------------------------------------------------------*/ + ps_dec->u2_frame_rate_extension_n = impeg2d_bit_stream_get(ps_stream,2); + ps_dec->u2_frame_rate_extension_d = impeg2d_bit_stream_get(ps_stream,5); + + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +} + +/******************************************************************************* +* +* Function Name : impeg2d_dec_seq_disp_ext +* +* Description : This function is eqvt to sequence_display_extension() of +* standard. It flushes data present as it is not being used +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : None +******************************************************************************/ +void impeg2d_dec_seq_disp_ext(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + ps_stream = &ps_dec->s_bit_stream; + + /* + sequence_display_extension() + { + extension_start_code_identifier 4 + video_format 3 + colour_description 1 + if (colour_description) + { + colour_primaries 8 + transfer_characteristics 8 + matrix_coefficients 8 + } + display_horizontal_size 14 + marker_bit 1 + display_vertical_size 14 + next_start_code() + } + */ + + impeg2d_bit_stream_get(ps_stream,7); + if (impeg2d_bit_stream_get_bit(ps_stream) == 1) + { + impeg2d_bit_stream_get(ps_stream,24); + } + + /* display_horizontal_size and display_vertical_size */ + ps_dec->u2_display_horizontal_size = impeg2d_bit_stream_get(ps_stream,14);; + GET_MARKER_BIT(ps_dec,ps_stream); + ps_dec->u2_display_vertical_size = impeg2d_bit_stream_get(ps_stream,14); + + impeg2d_next_start_code(ps_dec); +} + + +/******************************************************************************* +* +* Function Name : impeg2d_dec_seq_scale_ext +* +* Description : This function is eqvt to sequence_scalable_extension() of +* standard. +* +* Arguments : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_scale_ext(dec_state_t *ps_dec) +{ + UNUSED(ps_dec); + return IMPEG2D_SCALABILITIY_NOT_SUPPORTED; +} + +/******************************************************************************* +* +* Function Name : impeg2d_dec_quant_matrix_ext +* +* Description : Gets Intra and NonIntra quantizer matrix from the stream. +* +* Arguments : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_quant_matrix_ext(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + + ps_stream = &ps_dec->s_bit_stream; + /* Flush extension_start_code_identifier */ + impeg2d_bit_stream_flush(ps_stream,4); + + /*------------------------------------------------------------------------*/ + /* Quantization matrix for the intra blocks */ + /*------------------------------------------------------------------------*/ + if(impeg2d_bit_stream_get(ps_stream,1) == 1) + { + UWORD16 i; + for(i = 0; i < NUM_PELS_IN_BLOCK; i++) + { + ps_dec->au1_intra_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] = (UWORD8)impeg2d_bit_stream_get(ps_stream,8); + } + + } + + + /*------------------------------------------------------------------------*/ + /* Quantization matrix for the inter blocks */ + /*------------------------------------------------------------------------*/ + if(impeg2d_bit_stream_get(ps_stream,1) == 1) + { + UWORD16 i; + for(i = 0; i < NUM_PELS_IN_BLOCK; i++) + { + ps_dec->au1_inter_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] = (UWORD8)impeg2d_bit_stream_get(ps_stream,8); + } + } + + /* Note : chroma intra quantizer matrix and chroma non + intra quantizer matrix are not needed for 4:2:0 format */ + impeg2d_next_start_code(ps_dec); +} +/******************************************************************************* +* +* Function Name : impeg2d_dec_pic_disp_ext +* +* Description : This function is eqvt to picture_display_extension() of +* standard.The parameters are not used by decoder +* +* Arguments : Pointer to dec_state_t +* +* Values Returned : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_pic_disp_ext(dec_state_t *ps_dec) +{ + WORD16 i2_number_of_frame_centre_offsets ; + stream_t *ps_stream; + + ps_stream = &ps_dec->s_bit_stream; + impeg2d_bit_stream_flush(ps_stream,4); + + if (ps_dec->u2_progressive_sequence) + { + i2_number_of_frame_centre_offsets = (ps_dec->u2_repeat_first_field) ? + 2 + ps_dec->u2_top_field_first : 1; + } + else + { + i2_number_of_frame_centre_offsets = + (ps_dec->u2_picture_structure != FRAME_PICTURE) ? + 1 : 2 + ps_dec->u2_repeat_first_field; + } + while(i2_number_of_frame_centre_offsets--) + { + /* frame_centre_horizontal_offset */ + impeg2d_bit_stream_get(ps_stream,16); + GET_MARKER_BIT(ps_dec,ps_stream); + /* frame_centre_vertical_offset */ + impeg2d_bit_stream_get(ps_stream,16); + GET_MARKER_BIT(ps_dec,ps_stream); + } + impeg2d_next_start_code(ps_dec); +} + +/******************************************************************************* +* +* Function Name : impeg2d_dec_itu_t_ext +* +* Description : This function is eqvt to ITU-T_extension() of +* standard.The parameters are not used by decoder +* +* Arguments : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_itu_t_ext(dec_state_t *ps_dec) +{ + impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,EXT_ID_LEN); + impeg2d_next_start_code(ps_dec); +} + +/******************************************************************************* +* Function Name : impeg2d_dec_copyright_ext +* +* Description : This function is eqvt to copyright_extension() of +* standard. The parameters are not used by decoder +* +* Arguments : Decoder context +* +* Values Returned : None +*******************************************************************************/ + + +void impeg2d_dec_copyright_ext(dec_state_t *ps_dec) +{ + UWORD32 u4_bits_to_flush; + + u4_bits_to_flush = COPYRIGHT_EXTENSION_LEN; + + while(u4_bits_to_flush >= 32 ) + { + impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,32); + u4_bits_to_flush = u4_bits_to_flush - 32; + } + + if(u4_bits_to_flush > 0) + { + impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,u4_bits_to_flush); + } + + + impeg2d_next_start_code(ps_dec); +} +/******************************************************************************* +* Function Name : impeg2d_dec_cam_param_ext +* +* Description : This function is eqvt to camera_parameters_extension() of +* standard. The parameters are not used by decoder +* +* Arguments : Decoder context +* +* Values Returned : None +*******************************************************************************/ + + +void impeg2d_dec_cam_param_ext(dec_state_t *ps_dec) +{ + + UWORD32 u4_bits_to_flush; + + u4_bits_to_flush = CAMERA_PARAMETER_EXTENSION_LEN; + + while(u4_bits_to_flush >= 32 ) + { + impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,32); + u4_bits_to_flush = u4_bits_to_flush - 32; + } + + if(u4_bits_to_flush > 0) + { + impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,u4_bits_to_flush); + } + + impeg2d_next_start_code(ps_dec); +} + +/******************************************************************************* +* +* Function Name : impeg2d_dec_grp_of_pic_hdr +* +* Description : Gets information at the GOP level. +* +* Arguments : Decoder context +* +* Values Returned : None +*******************************************************************************/ + + +void impeg2d_dec_grp_of_pic_hdr(dec_state_t *ps_dec) +{ + + UWORD32 u4_bits_to_flush; + + u4_bits_to_flush = GROUP_OF_PICTURE_LEN; + + while(u4_bits_to_flush >= 32 ) + { + impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,32); + u4_bits_to_flush = u4_bits_to_flush - 32; + } + + if(u4_bits_to_flush > 0) + { + impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,u4_bits_to_flush); + } + +} + + +/******************************************************************************* +* +* Function Name : impeg2d_dec_pic_hdr +* +* Description : Gets the picture header information. +* +* Arguments : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_pic_hdr(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + ps_stream = &ps_dec->s_bit_stream; + + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + /* Flush temporal reference */ + impeg2d_bit_stream_get(ps_stream,10); + + /* Picture type */ + ps_dec->e_pic_type = (e_pic_type_t)impeg2d_bit_stream_get(ps_stream,3); + if((ps_dec->e_pic_type < I_PIC) || (ps_dec->e_pic_type > D_PIC)) + { + impeg2d_next_code(ps_dec, PICTURE_START_CODE); + return IMPEG2D_INVALID_PIC_TYPE; + } + + /* Flush vbv_delay */ + impeg2d_bit_stream_get(ps_stream,16); + + if(ps_dec->e_pic_type == P_PIC || ps_dec->e_pic_type == B_PIC) + { + ps_dec->u2_full_pel_forw_vector = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_forw_f_code = impeg2d_bit_stream_get(ps_stream,3); + } + if(ps_dec->e_pic_type == B_PIC) + { + ps_dec->u2_full_pel_back_vector = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_back_f_code = impeg2d_bit_stream_get(ps_stream,3); + } + + if(ps_dec->u2_is_mpeg2 == 0) + { + ps_dec->au2_f_code[0][0] = ps_dec->au2_f_code[0][1] = ps_dec->u2_forw_f_code; + ps_dec->au2_f_code[1][0] = ps_dec->au2_f_code[1][1] = ps_dec->u2_back_f_code; + } + + /*-----------------------------------------------------------------------*/ + /* Flush the extra bit value */ + /* */ + /* while(impeg2d_bit_stream_nxt() == '1') */ + /* { */ + /* extra_bit_picture 1 */ + /* extra_information_picture 8 */ + /* } */ + /* extra_bit_picture 1 */ + /*-----------------------------------------------------------------------*/ + while (impeg2d_bit_stream_nxt(ps_stream,1) == 1) + { + impeg2d_bit_stream_get(ps_stream,9); + } + impeg2d_bit_stream_get_bit(ps_stream); + impeg2d_next_start_code(ps_dec); + + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +} + + +/******************************************************************************* +* +* Function Name : impeg2d_dec_pic_coding_ext +* +* Description : Reads more picture level parameters +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_pic_coding_ext(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + + ps_stream = &ps_dec->s_bit_stream; + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + /* extension code identifier */ + impeg2d_bit_stream_get(ps_stream,4); + + ps_dec->au2_f_code[0][0] = impeg2d_bit_stream_get(ps_stream,4); + ps_dec->au2_f_code[0][1] = impeg2d_bit_stream_get(ps_stream,4); + ps_dec->au2_f_code[1][0] = impeg2d_bit_stream_get(ps_stream,4); + ps_dec->au2_f_code[1][1] = impeg2d_bit_stream_get(ps_stream,4); + ps_dec->u2_intra_dc_precision = impeg2d_bit_stream_get(ps_stream,2); + ps_dec->u2_picture_structure = impeg2d_bit_stream_get(ps_stream,2); + ps_dec->u2_top_field_first = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_frame_pred_frame_dct = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_concealment_motion_vectors = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_q_scale_type = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_intra_vlc_format = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_alternate_scan = impeg2d_bit_stream_get_bit(ps_stream); + ps_dec->u2_repeat_first_field = impeg2d_bit_stream_get_bit(ps_stream); + /* Flush chroma_420_type */ + impeg2d_bit_stream_get_bit(ps_stream); + + ps_dec->u2_progressive_frame = impeg2d_bit_stream_get_bit(ps_stream); + if (impeg2d_bit_stream_get_bit(ps_stream)) + { + /* Flush v_axis, field_sequence, burst_amplitude, sub_carrier_phase */ + impeg2d_bit_stream_flush(ps_stream,20); + } + impeg2d_next_start_code(ps_dec); + + + if(VERTICAL_SCAN == ps_dec->u2_alternate_scan) + { + ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_vertical; + } + else + { + ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_zig_zag; + } +} + +/******************************************************************************* +* +* Function Name : impeg2d_dec_slice +* +* Description : Reads Slice level parameters and calls functions that +* decode individual MBs of slice +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_slice(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + UWORD32 u4_slice_vertical_position; + UWORD32 u4_slice_vertical_position_extension; + IMPEG2D_ERROR_CODES_T e_error; + + ps_stream = &ps_dec->s_bit_stream; + + /*------------------------------------------------------------------------*/ + /* All the profiles supported require restricted slice structure. Hence */ + /* there is no need to store slice_vertical_position. Note that max */ + /* height supported does not exceed 2800 and scalablity is not supported */ + /*------------------------------------------------------------------------*/ + + /* Remove the slice start code */ + impeg2d_bit_stream_flush(ps_stream,START_CODE_PREFIX_LEN); + u4_slice_vertical_position = impeg2d_bit_stream_get(ps_stream, 8); + if(u4_slice_vertical_position > 2800) + { + u4_slice_vertical_position_extension = impeg2d_bit_stream_get(ps_stream, 3); + u4_slice_vertical_position += (u4_slice_vertical_position_extension << 7); + } + + if((u4_slice_vertical_position > ps_dec->u2_num_vert_mb) || + (u4_slice_vertical_position == 0)) + { + return IMPEG2D_INVALID_VERT_SIZE; + } + + // change the mb_y to point to slice_vertical_position + u4_slice_vertical_position--; + if (ps_dec->u2_mb_y != u4_slice_vertical_position) + { + ps_dec->u2_mb_y = u4_slice_vertical_position; + ps_dec->u2_mb_x = 0; + } + ps_dec->u2_first_mb = 1; + + /*------------------------------------------------------------------------*/ + /* Quant scale code decoding */ + /*------------------------------------------------------------------------*/ + { + UWORD16 u2_quant_scale_code; + u2_quant_scale_code = impeg2d_bit_stream_get(ps_stream,5); + ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ? + gau1_impeg2_non_linear_quant_scale[u2_quant_scale_code] : (u2_quant_scale_code << 1); + } + + if (impeg2d_bit_stream_nxt(ps_stream,1) == 1) + { + impeg2d_bit_stream_flush(ps_stream,9); + /* Flush extra bit information */ + while (impeg2d_bit_stream_nxt(ps_stream,1) == 1) + { + impeg2d_bit_stream_flush(ps_stream,9); + } + } + impeg2d_bit_stream_get_bit(ps_stream); + + /* Reset the DC predictors to reset values given in Table 7.2 at the start*/ + /* of slice data */ + ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + /*------------------------------------------------------------------------*/ + /* dec->DecMBsinSlice() implements the following psuedo code from standard*/ + /* do */ + /* { */ + /* macroblock() */ + /* } while (impeg2d_bit_stream_nxt() != '000 0000 0000 0000 0000 0000') */ + /*------------------------------------------------------------------------*/ + + e_error = ps_dec->pf_decode_slice(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + /* Check for the MBy index instead of number of MBs left, because the + * number of MBs left in case of multi-thread decode is the number of MBs + * in that row only + */ + if(ps_dec->u2_mb_y < ps_dec->u2_num_vert_mb) + impeg2d_next_start_code(ps_dec); + + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +} + +void impeg2d_dec_pic_data_thread(dec_state_t *ps_dec) +{ + WORD32 i4_continue_decode; + + WORD32 i4_cur_row, temp; + UWORD32 u4_bits_read; + WORD32 i4_dequeue_job; + IMPEG2D_ERROR_CODES_T e_error; + + i4_cur_row = ps_dec->u2_mb_y + 1; + + i4_continue_decode = 1; + + i4_dequeue_job = 1; + do + { + if(i4_cur_row > ps_dec->u2_num_vert_mb) + { + i4_continue_decode = 0; + break; + } + + { + if((ps_dec->i4_num_cores> 1) && (i4_dequeue_job)) + { + job_t s_job; + IV_API_CALL_STATUS_T e_ret; + UWORD8 *pu1_buf; + + e_ret = impeg2_jobq_dequeue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 1); + if(e_ret != IV_SUCCESS) + break; + + if(CMD_PROCESS == s_job.i4_cmd) + { + pu1_buf = ps_dec->pu1_inp_bits_buf + s_job.i4_bistream_ofst; + impeg2d_bit_stream_init(&(ps_dec->s_bit_stream), pu1_buf, + (ps_dec->u4_num_inp_bytes - s_job.i4_bistream_ofst) + 8); + i4_cur_row = s_job.i2_start_mb_y; + ps_dec->i4_start_mb_y = s_job.i2_start_mb_y; + ps_dec->i4_end_mb_y = s_job.i2_end_mb_y; + ps_dec->u2_mb_x = 0; + ps_dec->u2_mb_y = ps_dec->i4_start_mb_y; + ps_dec->u2_num_mbs_left = (ps_dec->i4_end_mb_y - ps_dec->i4_start_mb_y) * ps_dec->u2_num_horiz_mb; + + } + else + { + WORD32 start_row; + WORD32 num_rows; + start_row = s_job.i2_start_mb_y << 4; + num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size); + num_rows -= start_row; + impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + start_row, num_rows); + break; + + } + + } + e_error = impeg2d_dec_slice(ps_dec); + + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + impeg2d_next_start_code(ps_dec); + } + } + + /* Detecting next slice start code */ + while(1) + { + // skip (dec->u4_num_cores-1) rows + u4_bits_read = impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,START_CODE_LEN); + temp = u4_bits_read & 0xFF; + i4_continue_decode = (((u4_bits_read >> 8) == 0x01) && (temp) && (temp <= 0xAF)); + + if(i4_continue_decode) + { + /* If the slice is from the same row, then continue decoding without dequeue */ + if((temp - 1) == i4_cur_row) + { + i4_dequeue_job = 0; + break; + } + + if(temp < ps_dec->i4_end_mb_y) + { + i4_cur_row = ps_dec->u2_mb_y; + } + else + { + i4_dequeue_job = 1; + } + break; + + } + else + break; + } + + }while(i4_continue_decode); + if(ps_dec->i4_num_cores > 1) + { + while(1) + { + job_t s_job; + IV_API_CALL_STATUS_T e_ret; + + e_ret = impeg2_jobq_dequeue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 1); + if(e_ret != IV_SUCCESS) + break; + if(CMD_FMTCONV == s_job.i4_cmd) + { + WORD32 start_row; + WORD32 num_rows; + start_row = s_job.i2_start_mb_y << 4; + num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size); + num_rows -= start_row; + impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + start_row, num_rows); + } + } + } + else + { + if((NULL != ps_dec->ps_disp_pic) && ((0 == ps_dec->u4_share_disp_buf) || (IV_YUV_420P != ps_dec->i4_chromaFormat))) + impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + 0, ps_dec->u2_vertical_size); + } +} + +static WORD32 impeg2d_init_thread_dec_ctxt(dec_state_t *ps_dec, + dec_state_t *ps_dec_thd, + WORD32 i4_min_mb_y) +{ + UNUSED(i4_min_mb_y); + ps_dec_thd->i4_start_mb_y = 0; + ps_dec_thd->i4_end_mb_y = ps_dec->u2_num_vert_mb; + ps_dec_thd->u2_mb_x = 0; + ps_dec_thd->u2_mb_y = 0; + ps_dec_thd->u2_is_mpeg2 = ps_dec->u2_is_mpeg2; + ps_dec_thd->u2_frame_width = ps_dec->u2_frame_width; + ps_dec_thd->u2_frame_height = ps_dec->u2_frame_height; + ps_dec_thd->u2_picture_width = ps_dec->u2_picture_width; + ps_dec_thd->u2_horizontal_size = ps_dec->u2_horizontal_size; + ps_dec_thd->u2_vertical_size = ps_dec->u2_vertical_size; + ps_dec_thd->u2_create_max_width = ps_dec->u2_create_max_width; + ps_dec_thd->u2_create_max_height = ps_dec->u2_create_max_height; + ps_dec_thd->u2_header_done = ps_dec->u2_header_done; + ps_dec_thd->u2_decode_header = ps_dec->u2_decode_header; + + ps_dec_thd->u2_num_horiz_mb = ps_dec->u2_num_horiz_mb; + ps_dec_thd->u2_num_vert_mb = ps_dec->u2_num_vert_mb; + ps_dec_thd->u2_num_flds_decoded = ps_dec->u2_num_flds_decoded; + + ps_dec_thd->u4_frm_buf_stride = ps_dec->u4_frm_buf_stride; + + ps_dec_thd->u2_field_dct = ps_dec->u2_field_dct; + ps_dec_thd->u2_read_dct_type = ps_dec->u2_read_dct_type; + + ps_dec_thd->u2_read_motion_type = ps_dec->u2_read_motion_type; + ps_dec_thd->u2_motion_type = ps_dec->u2_motion_type; + + ps_dec_thd->pu2_mb_type = ps_dec->pu2_mb_type; + ps_dec_thd->u2_fld_pic = ps_dec->u2_fld_pic; + ps_dec_thd->u2_frm_pic = ps_dec->u2_frm_pic; + + ps_dec_thd->u2_fld_parity = ps_dec->u2_fld_parity; + + ps_dec_thd->au2_fcode_data[0] = ps_dec->au2_fcode_data[0]; + ps_dec_thd->au2_fcode_data[1] = ps_dec->au2_fcode_data[1]; + + ps_dec_thd->u1_quant_scale = ps_dec->u1_quant_scale; + + ps_dec_thd->u2_num_mbs_left = ps_dec->u2_num_mbs_left; + ps_dec_thd->u2_first_mb = ps_dec->u2_first_mb; + ps_dec_thd->u2_num_skipped_mbs = ps_dec->u2_num_skipped_mbs; + + memcpy(&ps_dec_thd->s_cur_frm_buf, &ps_dec->s_cur_frm_buf, sizeof(yuv_buf_t)); + memcpy(&ps_dec_thd->as_recent_fld[0][0], &ps_dec->as_recent_fld[0][0], sizeof(yuv_buf_t)); + memcpy(&ps_dec_thd->as_recent_fld[0][1], &ps_dec->as_recent_fld[0][1], sizeof(yuv_buf_t)); + memcpy(&ps_dec_thd->as_recent_fld[1][0], &ps_dec->as_recent_fld[1][0], sizeof(yuv_buf_t)); + memcpy(&ps_dec_thd->as_recent_fld[1][1], &ps_dec->as_recent_fld[1][1], sizeof(yuv_buf_t)); + memcpy(&ps_dec_thd->as_ref_buf, &ps_dec->as_ref_buf, sizeof(yuv_buf_t) * 2 * 2); + + + ps_dec_thd->pf_decode_slice = ps_dec->pf_decode_slice; + + ps_dec_thd->pf_vld_inv_quant = ps_dec->pf_vld_inv_quant; + + memcpy(ps_dec_thd->pf_idct_recon, ps_dec->pf_idct_recon, sizeof(ps_dec->pf_idct_recon)); + + memcpy(ps_dec_thd->pf_mc, ps_dec->pf_mc, sizeof(ps_dec->pf_mc)); + ps_dec_thd->pf_interpolate = ps_dec->pf_interpolate; + ps_dec_thd->pf_copy_mb = ps_dec->pf_copy_mb; + ps_dec_thd->pf_fullx_halfy_8x8 = ps_dec->pf_fullx_halfy_8x8; + ps_dec_thd->pf_halfx_fully_8x8 = ps_dec->pf_halfx_fully_8x8; + ps_dec_thd->pf_halfx_halfy_8x8 = ps_dec->pf_halfx_halfy_8x8; + ps_dec_thd->pf_fullx_fully_8x8 = ps_dec->pf_fullx_fully_8x8; + + ps_dec_thd->pf_memset_8bit_8x8_block = ps_dec->pf_memset_8bit_8x8_block; + ps_dec_thd->pf_memset_16bit_8x8_linear_block = ps_dec->pf_memset_16bit_8x8_linear_block; + ps_dec_thd->pf_copy_yuv420p_buf = ps_dec->pf_copy_yuv420p_buf; + ps_dec_thd->pf_fmt_conv_yuv420p_to_yuv422ile = ps_dec->pf_fmt_conv_yuv420p_to_yuv422ile; + ps_dec_thd->pf_fmt_conv_yuv420p_to_yuv420sp_uv = ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv; + ps_dec_thd->pf_fmt_conv_yuv420p_to_yuv420sp_vu = ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu; + + + memcpy(ps_dec_thd->au1_intra_quant_matrix, ps_dec->au1_intra_quant_matrix, NUM_PELS_IN_BLOCK * sizeof(UWORD8)); + memcpy(ps_dec_thd->au1_inter_quant_matrix, ps_dec->au1_inter_quant_matrix, NUM_PELS_IN_BLOCK * sizeof(UWORD8)); + ps_dec_thd->pu1_inv_scan_matrix = ps_dec->pu1_inv_scan_matrix; + + + ps_dec_thd->u2_progressive_sequence = ps_dec->u2_progressive_sequence; + ps_dec_thd->e_pic_type = ps_dec->e_pic_type; + ps_dec_thd->u2_full_pel_forw_vector = ps_dec->u2_full_pel_forw_vector; + ps_dec_thd->u2_forw_f_code = ps_dec->u2_forw_f_code; + ps_dec_thd->u2_full_pel_back_vector = ps_dec->u2_full_pel_back_vector; + ps_dec_thd->u2_back_f_code = ps_dec->u2_back_f_code; + + memcpy(ps_dec_thd->ai2_mv, ps_dec->ai2_mv, (2*2*2)*sizeof(WORD16)); + memcpy(ps_dec_thd->au2_f_code, ps_dec->au2_f_code, (2*2)*sizeof(UWORD16)); + ps_dec_thd->u2_intra_dc_precision = ps_dec->u2_intra_dc_precision; + ps_dec_thd->u2_picture_structure = ps_dec->u2_picture_structure; + ps_dec_thd->u2_top_field_first = ps_dec->u2_top_field_first; + ps_dec_thd->u2_frame_pred_frame_dct = ps_dec->u2_frame_pred_frame_dct; + ps_dec_thd->u2_concealment_motion_vectors = ps_dec->u2_concealment_motion_vectors; + ps_dec_thd->u2_q_scale_type = ps_dec->u2_q_scale_type; + ps_dec_thd->u2_intra_vlc_format = ps_dec->u2_intra_vlc_format; + ps_dec_thd->u2_alternate_scan = ps_dec->u2_alternate_scan; + ps_dec_thd->u2_repeat_first_field = ps_dec->u2_repeat_first_field; + ps_dec_thd->u2_progressive_frame = ps_dec->u2_progressive_frame; + ps_dec_thd->pu1_inp_bits_buf = ps_dec->pu1_inp_bits_buf; + ps_dec_thd->u4_num_inp_bytes = ps_dec->u4_num_inp_bytes; + ps_dec_thd->pv_jobq = ps_dec->pv_jobq; + ps_dec_thd->pv_jobq_buf = ps_dec->pv_jobq_buf; + ps_dec_thd->i4_jobq_buf_size = ps_dec->i4_jobq_buf_size; + + + ps_dec_thd->u2_frame_rate_code = ps_dec->u2_frame_rate_code; + ps_dec_thd->u2_frame_rate_extension_n = ps_dec->u2_frame_rate_extension_n; + ps_dec_thd->u2_frame_rate_extension_d = ps_dec->u2_frame_rate_extension_d; + ps_dec_thd->u2_framePeriod = ps_dec->u2_framePeriod; + ps_dec_thd->u2_display_horizontal_size = ps_dec->u2_display_horizontal_size; + ps_dec_thd->u2_display_vertical_size = ps_dec->u2_display_vertical_size; + ps_dec_thd->u2_aspect_ratio_info = ps_dec->u2_aspect_ratio_info; + + ps_dec_thd->ps_func_bi_direct = ps_dec->ps_func_bi_direct; + ps_dec_thd->ps_func_forw_or_back = ps_dec->ps_func_forw_or_back; + + return 0; + +} + + +WORD32 impeg2d_get_slice_pos(dec_state_multi_core_t *ps_dec_state_multi_core) +{ + WORD32 u4_bits; + WORD32 i4_row; + + + dec_state_t *ps_dec = ps_dec_state_multi_core->ps_dec_state[0]; + WORD32 i4_prev_row; + stream_t s_bitstrm; + WORD32 i4_start_row; + WORD32 i4_slice_bistream_ofst; + WORD32 i; + s_bitstrm = ps_dec->s_bit_stream; + i4_prev_row = -1; + + ps_dec_state_multi_core->ps_dec_state[0]->i4_start_mb_y = 0; + ps_dec_state_multi_core->ps_dec_state[1]->i4_start_mb_y = -1; + ps_dec_state_multi_core->ps_dec_state[2]->i4_start_mb_y = -1; + ps_dec_state_multi_core->ps_dec_state[3]->i4_start_mb_y = -1; + + ps_dec_state_multi_core->ps_dec_state[0]->i4_end_mb_y = ps_dec->u2_num_vert_mb; + ps_dec_state_multi_core->ps_dec_state[1]->i4_end_mb_y = -1; + ps_dec_state_multi_core->ps_dec_state[2]->i4_end_mb_y = -1; + ps_dec_state_multi_core->ps_dec_state[3]->i4_end_mb_y = -1; + + if(ps_dec->i4_num_cores == 1) + return 0; + /* Reset the jobq to start of the jobq buffer */ + impeg2_jobq_reset((jobq_t *)ps_dec->pv_jobq); + + i4_start_row = -1; + i4_slice_bistream_ofst = 0; + while(1) + { + WORD32 i4_is_slice; + u4_bits = impeg2d_bit_stream_nxt(&s_bitstrm,START_CODE_LEN); + if(s_bitstrm.u4_offset >= s_bitstrm.u4_max_offset) + { + break; + } + + + i4_row = u4_bits & 0xFF; + + /* Detect end of frame */ + i4_is_slice = (((u4_bits >> 8) == 0x01) && (i4_row) && (i4_row <= ps_dec->u2_num_vert_mb)); + if(!i4_is_slice) + break; + + i4_row -= 1; + + + if(i4_prev_row != i4_row) + { + /* Create a job for previous slice row */ + if(i4_start_row != -1) + { + job_t s_job; + IV_API_CALL_STATUS_T ret; + s_job.i2_start_mb_y = i4_start_row; + s_job.i2_end_mb_y = i4_row; + s_job.i4_cmd = CMD_PROCESS; + s_job.i4_bistream_ofst = i4_slice_bistream_ofst; + ret = impeg2_jobq_queue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 0); + if(ret != IV_SUCCESS) + return ret; + + } + /* Store current slice's bitstream offset */ + i4_slice_bistream_ofst = s_bitstrm.u4_offset >> 3; + i4_slice_bistream_ofst -= (size_t)s_bitstrm.pv_bs_buf & 3; + i4_prev_row = i4_row; + + /* Store current slice's row position */ + i4_start_row = i4_row; + + } + + + impeg2d_bit_stream_flush(&s_bitstrm, START_CODE_LEN); + + // flush bytes till next start code + /* Flush the bytes till a start code is encountered */ + while(impeg2d_bit_stream_nxt(&s_bitstrm, 24) != START_CODE_PREFIX) + { + impeg2d_bit_stream_get(&s_bitstrm, 8); + + if(s_bitstrm.u4_offset >= s_bitstrm.u4_max_offset) + { + break; + } + } + } + + /* Create job for the last slice row */ + { + job_t s_job; + IV_API_CALL_STATUS_T e_ret; + s_job.i2_start_mb_y = i4_start_row; + s_job.i2_end_mb_y = ps_dec->u2_num_vert_mb; + s_job.i4_cmd = CMD_PROCESS; + s_job.i4_bistream_ofst = i4_slice_bistream_ofst; + e_ret = impeg2_jobq_queue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 0); + if(e_ret != IV_SUCCESS) + return e_ret; + + } + if((NULL != ps_dec->ps_disp_pic) && ((0 == ps_dec->u4_share_disp_buf) || (IV_YUV_420P != ps_dec->i4_chromaFormat))) + { + for(i = 0; i < ps_dec->u2_vertical_size; i+=64) + { + job_t s_job; + IV_API_CALL_STATUS_T ret; + s_job.i2_start_mb_y = i; + s_job.i2_start_mb_y >>= 4; + s_job.i2_end_mb_y = (i + 64); + s_job.i2_end_mb_y >>= 4; + s_job.i4_cmd = CMD_FMTCONV; + s_job.i4_bistream_ofst = 0; + ret = impeg2_jobq_queue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 0); + if(ret != IV_SUCCESS) + return ret; + + } + } + + impeg2_jobq_terminate(ps_dec->pv_jobq); + ps_dec->i4_bytes_consumed = s_bitstrm.u4_offset >> 3; + ps_dec->i4_bytes_consumed -= ((size_t)s_bitstrm.pv_bs_buf & 3); + + return 0; +} + +/******************************************************************************* +* +* Function Name : impeg2d_dec_pic_data +* +* Description : It intializes several parameters and decodes a Picture +* till any slice is left. +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ + +void impeg2d_dec_pic_data(dec_state_t *ps_dec) +{ + + WORD32 i; + dec_state_multi_core_t *ps_dec_state_multi_core; + + UWORD32 u4_error_code; + + dec_state_t *ps_dec_thd; + WORD32 i4_status; + WORD32 i4_min_mb_y; + + + /* Resetting the MB address and MB coordinates at the start of the Frame */ + ps_dec->u2_mb_x = ps_dec->u2_mb_y = 0; + u4_error_code = 0; + + ps_dec_state_multi_core = ps_dec->ps_dec_state_multi_core; + impeg2d_get_slice_pos(ps_dec_state_multi_core); + + i4_min_mb_y = 1; + for(i=0; i < ps_dec->i4_num_cores - 1; i++) + { + // initialize decoder context for thread + // launch dec->u4_num_cores-1 threads + + ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i+1]; + + ps_dec_thd->ps_disp_pic = ps_dec->ps_disp_pic; + ps_dec_thd->ps_disp_frm_buf = ps_dec->ps_disp_frm_buf; + + i4_status = impeg2d_init_thread_dec_ctxt(ps_dec, ps_dec_thd, i4_min_mb_y); + //impeg2d_dec_pic_data_thread(ps_dec_thd); + + if(i4_status == 0) + { + ithread_create(ps_dec_thd->pv_codec_thread_handle, NULL, (void *)impeg2d_dec_pic_data_thread, ps_dec_thd); + ps_dec_state_multi_core->au4_thread_launched[i + 1] = 1; + i4_min_mb_y = ps_dec_thd->u2_mb_y + 1; + } + else + { + ps_dec_state_multi_core->au4_thread_launched[i + 1] = 0; + break; + } + } + + impeg2d_dec_pic_data_thread(ps_dec); + + // wait for threads to complete + for(i=0; i < (ps_dec->i4_num_cores - 1); i++) + { + if(ps_dec_state_multi_core->au4_thread_launched[i + 1] == 1) + { + ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i+1]; + ithread_join(ps_dec_thd->pv_codec_thread_handle, NULL); + } + } + + ps_dec->u4_error_code = u4_error_code; + +} +/******************************************************************************* +* +* Function Name : impeg2d_flush_ext_and_user_data +* +* Description : Flushes the extension and user data present in the +* stream_t +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_flush_ext_and_user_data(dec_state_t *ps_dec) +{ + UWORD32 u4_start_code; + stream_t *ps_stream; + + ps_stream = &ps_dec->s_bit_stream; + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + + while(u4_start_code == EXTENSION_START_CODE || u4_start_code == USER_DATA_START_CODE) + { + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + while(impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX) + { + impeg2d_bit_stream_flush(ps_stream,8); + } + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + } +} +/******************************************************************************* +* +* Function Name : impeg2d_dec_user_data +* +* Description : Flushes the user data present in the stream_t +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_user_data(dec_state_t *ps_dec) +{ + UWORD32 u4_start_code; + stream_t *ps_stream; + + ps_stream = &ps_dec->s_bit_stream; + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + + while(u4_start_code == USER_DATA_START_CODE) + { + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + while(impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX) + { + impeg2d_bit_stream_flush(ps_stream,8); + } + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + } +} +/******************************************************************************* +* Function Name : impeg2d_dec_seq_ext_data +* +* Description : Decodes the extension data following Sequence +* Extension. It flushes any user data if present +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_ext_data(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + UWORD32 u4_start_code; + IMPEG2D_ERROR_CODES_T e_error; + + e_error = (IMPEG2D_ERROR_CODES_T) IVD_ERROR_NONE; + + ps_stream = &ps_dec->s_bit_stream; + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + while( (u4_start_code == EXTENSION_START_CODE || + u4_start_code == USER_DATA_START_CODE) && + (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE == e_error) + { + if(u4_start_code == USER_DATA_START_CODE) + { + impeg2d_dec_user_data(ps_dec); + } + else + { + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,EXT_ID_LEN); + switch(u4_start_code) + { + case SEQ_DISPLAY_EXT_ID: + impeg2d_dec_seq_disp_ext(ps_dec); + break; + case SEQ_SCALABLE_EXT_ID: + e_error = IMPEG2D_SCALABILITIY_NOT_SUPPORTED; + break; + default: + /* In case its a reserved extension code */ + impeg2d_bit_stream_flush(ps_stream,EXT_ID_LEN); + impeg2d_peek_next_start_code(ps_dec); + break; + } + } + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + } + return e_error; +} +/******************************************************************************* +* Function Name : impeg2d_dec_pic_ext_data +* +* Description : Decodes the extension data following Picture Coding +* Extension. It flushes any user data if present +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_pic_ext_data(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + UWORD32 u4_start_code; + IMPEG2D_ERROR_CODES_T e_error; + + e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; + + ps_stream = &ps_dec->s_bit_stream; + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + while ( (u4_start_code == EXTENSION_START_CODE || + u4_start_code == USER_DATA_START_CODE) && + (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE == e_error) + { + if(u4_start_code == USER_DATA_START_CODE) + { + impeg2d_dec_user_data(ps_dec); + } + else + { + impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN); + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,EXT_ID_LEN); + switch(u4_start_code) + { + case QUANT_MATRIX_EXT_ID: + impeg2d_dec_quant_matrix_ext(ps_dec); + break; + case COPYRIGHT_EXT_ID: + impeg2d_dec_copyright_ext(ps_dec); + break; + case PIC_DISPLAY_EXT_ID: + impeg2d_dec_pic_disp_ext(ps_dec); + break; + case CAMERA_PARAM_EXT_ID: + impeg2d_dec_cam_param_ext(ps_dec); + break; + case ITU_T_EXT_ID: + impeg2d_dec_itu_t_ext(ps_dec); + break; + case PIC_SPATIAL_SCALABLE_EXT_ID: + case PIC_TEMPORAL_SCALABLE_EXT_ID: + e_error = IMPEG2D_SCALABLITY_NOT_SUP; + break; + default: + /* In case its a reserved extension code */ + impeg2d_bit_stream_flush(ps_stream,EXT_ID_LEN); + impeg2d_next_start_code(ps_dec); + break; + } + } + u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + } + return e_error; +} + +/******************************************************************************* +* +* Function Name : impeg2d_process_video_header +* +* Description : Processes video sequence header information +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_process_video_header(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + ps_stream = &ps_dec->s_bit_stream; + IMPEG2D_ERROR_CODES_T e_error; + + impeg2d_next_code(ps_dec, SEQUENCE_HEADER_CODE); + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + e_error = impeg2d_dec_seq_hdr(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + } + else + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + if (impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == EXTENSION_START_CODE) + { + /* MPEG2 Decoder */ + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + e_error = impeg2d_dec_seq_ext(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + } + else + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + e_error = impeg2d_dec_seq_ext_data(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + } + return impeg2d_init_video_state(ps_dec,MPEG_2_VIDEO); + } + else + { + /* MPEG1 Decoder */ + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + impeg2d_flush_ext_and_user_data(ps_dec); + } + return impeg2d_init_video_state(ps_dec,MPEG_1_VIDEO); + } +} +/******************************************************************************* +* +* Function Name : impeg2d_process_video_bit_stream +* +* Description : Processes video sequence header information +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_process_video_bit_stream(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + UWORD32 u4_next_bits, u4_start_code_found; + IMPEG2D_ERROR_CODES_T e_error; + + ps_stream = &ps_dec->s_bit_stream; + impeg2d_next_start_code(ps_dec); + /* If the stream is MPEG-2 compliant stream */ + u4_start_code_found = 0; + + if(ps_dec->u2_is_mpeg2) + { + /* MPEG2 decoding starts */ + while((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)) + { + u4_next_bits = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + + if(u4_next_bits == SEQUENCE_HEADER_CODE) + { + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + e_error = impeg2d_dec_seq_hdr(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + u4_start_code_found = 0; + + } + else + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + + + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + IMPEG2D_ERROR_CODES_T e_error; + e_error = impeg2d_dec_seq_ext(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + u4_start_code_found = 0; + + } + else + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + } + else if((u4_next_bits == USER_DATA_START_CODE) || (u4_next_bits == EXTENSION_START_CODE)) + { + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + impeg2d_dec_seq_ext_data(ps_dec); + u4_start_code_found = 0; + + } + + } + else if((ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + && (u4_next_bits == GOP_START_CODE)) + { + impeg2d_dec_grp_of_pic_hdr(ps_dec); + impeg2d_dec_user_data(ps_dec); + u4_start_code_found = 0; + + } + else if((ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + && (u4_next_bits == PICTURE_START_CODE)) + { + + e_error = impeg2d_dec_pic_hdr(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + impeg2d_dec_pic_coding_ext(ps_dec); + e_error = impeg2d_dec_pic_ext_data(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + impeg2d_pre_pic_dec_proc(ps_dec); + impeg2d_dec_pic_data(ps_dec); + impeg2d_post_pic_dec_proc(ps_dec); + u4_start_code_found = 1; + } + else + + { + FLUSH_BITS(ps_dec->s_bit_stream.u4_offset, ps_dec->s_bit_stream.u4_buf, ps_dec->s_bit_stream.u4_buf_nxt, 8, ps_dec->s_bit_stream.pu4_buf_aligned); + + } + if(u4_start_code_found == 0) + { + impeg2d_next_start_code(ps_dec); + } + } + if((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset)) + { + return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND; + } + + } + /* If the stream is MPEG-1 compliant stream */ + else + { + while((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)) + { + u4_next_bits = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN); + + if(impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == SEQUENCE_HEADER_CODE) + { + if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) + { + e_error = impeg2d_dec_seq_hdr(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + u4_start_code_found = 0; + } + else + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + } + else if((ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) && (u4_next_bits == EXTENSION_START_CODE || u4_next_bits == USER_DATA_START_CODE)) + { + impeg2d_flush_ext_and_user_data(ps_dec); + u4_start_code_found = 0; + } + + + else if ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == GOP_START_CODE) + && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)) + { + impeg2d_dec_grp_of_pic_hdr(ps_dec); + impeg2d_flush_ext_and_user_data(ps_dec); + u4_start_code_found = 0; + } + else if ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == PICTURE_START_CODE) + && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)) + { + + e_error = impeg2d_dec_pic_hdr(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + impeg2d_flush_ext_and_user_data(ps_dec); + impeg2d_pre_pic_dec_proc(ps_dec); + impeg2d_dec_pic_data(ps_dec); + impeg2d_post_pic_dec_proc(ps_dec); + u4_start_code_found = 1; + } + else + { + FLUSH_BITS(ps_dec->s_bit_stream.u4_offset, ps_dec->s_bit_stream.u4_buf, ps_dec->s_bit_stream.u4_buf_nxt, 8, ps_dec->s_bit_stream.pu4_buf_aligned); + } + impeg2d_next_start_code(ps_dec); + + } + if((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset)) + { + return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND; + } + } + + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +} diff --git a/decoder/impeg2d_dec_hdr.h b/decoder/impeg2d_dec_hdr.h new file mode 100644 index 0000000..8bd0378 --- /dev/null +++ b/decoder/impeg2d_dec_hdr.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : impeg2d_dec_hdr.h */ +/* */ +/* Description : This file contains all the necessary examples to */ +/* establish a consistent use of Ittiam C coding */ +/* standards (based on Indian Hill C Standards) */ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 10 10 2005 Ittiam Draft */ +/* */ +/*****************************************************************************/ + +#ifndef __IMPEG2D_DEC_HDR_H__ +#define __IMPEG2D_DEC_HDR_H__ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_process_video_header(dec_state_t *dec); + +IMPEG2D_ERROR_CODES_T impeg2d_process_video_bit_stream(dec_state_t *dec); + + +#endif /* __IMPEG2D_DEC_HDR_H__ */ + diff --git a/decoder/impeg2d_decoder.c b/decoder/impeg2d_decoder.c new file mode 100755 index 0000000..ae58675 --- /dev/null +++ b/decoder/impeg2d_decoder.c @@ -0,0 +1,292 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : mpeg2dec_api_utils.c */ +/* */ +/* */ +/* Description : This file defines the API interface for MPEG2 Decoder*/ +/* */ +/* List of Functions : <List the functions defined in this file> */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 17 09 2007 Rajendra C Y Creation */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ + +#include <stddef.h> +#include <stdio.h> +#include <string.h> + +/* User include files */ +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "ithread.h" + +#include "impeg2_job_queue.h" +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" + +#include "impeg2d.h" +#include "impeg2d_api.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_debug.h" +#include "impeg2d_structs.h" +#include "impeg2d_mc.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_dec_hdr.h" + +void impeg2d_next_start_code(dec_state_t *ps_dec); +void impeg2d_next_code(dec_state_t *ps_dec, UWORD32 u4_start_code_val); + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_dec_hdr */ +/* */ +/* Description : */ +/* Inputs : */ +/* Globals : */ +/* Processing : */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 17 09 2007 Rajendra C Y Draft */ +/* */ +/*****************************************************************************/ +void impeg2d_dec_hdr(void *pv_dec,impeg2d_video_decode_ip_t *ps_ip, + impeg2d_video_decode_op_t *ps_op) +{ + + UWORD32 u4_bits_read; + dec_state_t *ps_dec; + + ps_dec = (dec_state_t *)pv_dec; + ps_op->s_ivd_video_decode_op_t.u4_error_code = 0; + + impeg2d_bit_stream_init(&(ps_dec->s_bit_stream),ps_ip->s_ivd_video_decode_ip_t.pv_stream_buffer, + ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes); + + { + { + IMPEG2D_ERROR_CODES_T e_error; + e_error = impeg2d_process_video_header(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + ps_op->s_ivd_video_decode_op_t.u4_error_code = e_error; + + u4_bits_read = impeg2d_bit_stream_num_bits_read(&ps_dec->s_bit_stream); + + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = u4_bits_read>> 3; + if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes) + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes; + } + if(ps_op->s_ivd_video_decode_op_t.u4_error_code == 0) + ps_op->s_ivd_video_decode_op_t.u4_error_code = e_error; + + + impeg2d_next_code(ps_dec, SEQUENCE_HEADER_CODE); + return; + } + } + ps_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec->u2_vertical_size; + ps_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec->u2_horizontal_size; + + ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_NA_FRAME; + ps_op->s_ivd_video_decode_op_t.u4_error_code = IV_SUCCESS; + + u4_bits_read = impeg2d_bit_stream_num_bits_read(&ps_dec->s_bit_stream); + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = u4_bits_read>> 3; + if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes) + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes; + } + ps_op->s_ivd_video_decode_op_t.u4_frame_decoded_flag = 0; + /* MOD */ + ps_dec->u2_header_done = 1; + ps_dec->u2_decode_header = 0; + + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_dec_frm */ +/* */ +/* Description : */ +/* Inputs : */ +/* Globals : */ +/* Processing : */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 17 09 2007 Rajendra C Y Draft */ +/* */ +/*****************************************************************************/ +void impeg2d_dec_frm(void *pv_dec,impeg2d_video_decode_ip_t *ps_ip, + impeg2d_video_decode_op_t *ps_op) +{ + + + stream_t *ps_stream; + UWORD32 u4_size = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes; + + dec_state_t *ps_dec; + + ps_dec = (dec_state_t *)pv_dec; + ps_op->s_ivd_video_decode_op_t.u4_error_code = 0; + + IMPEG2D_FRM_NUM_SET(); + + ps_dec->pu1_inp_bits_buf = ps_ip->s_ivd_video_decode_ip_t.pv_stream_buffer; + ps_dec->u4_num_inp_bytes = u4_size; + ps_stream = &ps_dec->s_bit_stream; + + + impeg2d_bit_stream_init(ps_stream,ps_ip->s_ivd_video_decode_ip_t.pv_stream_buffer,u4_size); + + /* @ */ /* Updating the bufferID */ + + ps_dec->u4_xdmBufID = ps_ip->s_ivd_video_decode_ip_t.u4_ts; + + { + IMPEG2D_ERROR_CODES_T e_error; + /* Process the Bitstream */ + e_error = impeg2d_process_video_bit_stream(ps_dec); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + ps_op->s_ivd_video_decode_op_t.u4_error_code = e_error; + + if ((IMPEG2D_ERROR_CODES_T) IVD_RES_CHANGED == e_error) + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = 0; + ps_dec->u2_header_done = 0; + } + else if (IMPEG2D_UNSUPPORTED_DIMENSIONS == e_error) + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = 0; + ps_dec->u2_header_done = 0; + + ps_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec->u2_reinit_max_height; + ps_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec->u2_reinit_max_width; + } + else + { + if(ps_dec->i4_num_cores > 1) + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_dec->i4_bytes_consumed; + else + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = (ps_dec->s_bit_stream.u4_offset + 7) >> 3; + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed -= ((size_t)ps_dec->s_bit_stream.pv_bs_buf & 3); + } + + if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed + > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes) + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = + ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes; + } + + impeg2d_next_start_code(ps_dec); + } + + if(ps_op->s_ivd_video_decode_op_t.u4_error_code == 0) + { + ps_op->s_ivd_video_decode_op_t.u4_error_code = e_error; + } + + return; + } + } + /**************************************************************************/ + /* Remove the bytes left till next start code is encountered */ + /**************************************************************************/ + ps_op->s_ivd_video_decode_op_t.u4_error_code = IV_SUCCESS; + + if(ps_dec->i4_num_cores > 1) + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_dec->i4_bytes_consumed; + else + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = (ps_dec->s_bit_stream.u4_offset + 7) >> 3; + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed -= ((size_t)ps_dec->s_bit_stream.pv_bs_buf & 3); + } + if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes) + { + ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes; + } + ps_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec->u2_vertical_size; + ps_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec->u2_horizontal_size; + + switch(ps_dec->e_pic_type) + { + case I_PIC : + ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME; + break; + + case P_PIC: + ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_P_FRAME; + break; + + case B_PIC: + ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_B_FRAME; + break; + + case D_PIC: + ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME; + break; + + default : + ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_FRAMETYPE_DEFAULT; + break; + } + + ps_op->s_ivd_video_decode_op_t.u4_frame_decoded_flag = ps_dec->i4_frame_decoded; + ps_op->s_ivd_video_decode_op_t.u4_new_seq = 0; + ps_op->s_ivd_video_decode_op_t.u4_error_code = ps_dec->u4_error_code; + + +} diff --git a/decoder/impeg2d_function_selector_generic.c b/decoder/impeg2d_function_selector_generic.c new file mode 100644 index 0000000..b8cdf03 --- /dev/null +++ b/decoder/impeg2d_function_selector_generic.c @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" +#include "ithread.h" + + +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_job_queue.h" +#include "impeg2_globals.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_api.h" +#include "impeg2d_debug.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_mc.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" + +void impeg2d_init_function_ptr_generic(void *pv_codec) +{ + dec_state_t *ps_dec = (dec_state_t *)pv_codec; + + ps_dec->pf_idct_recon[0] = &impeg2_idct_recon_dc; + ps_dec->pf_idct_recon[1] = &impeg2_idct_recon_dc_mismatch; + ps_dec->pf_idct_recon[2] = &impeg2_idct_recon; + ps_dec->pf_idct_recon[3] = &impeg2_idct_recon; + + ps_dec->pf_mc[0] = &impeg2d_mc_fullx_fully; + ps_dec->pf_mc[1] = &impeg2d_mc_fullx_halfy; + ps_dec->pf_mc[2] = &impeg2d_mc_halfx_fully; + ps_dec->pf_mc[3] = &impeg2d_mc_halfx_halfy; + + ps_dec->pf_interpolate = &impeg2_interpolate; + ps_dec->pf_copy_mb = &impeg2_copy_mb; + + ps_dec->pf_fullx_halfy_8x8 = &impeg2_mc_fullx_halfy_8x8; + ps_dec->pf_halfx_fully_8x8 = &impeg2_mc_halfx_fully_8x8; + ps_dec->pf_halfx_halfy_8x8 = &impeg2_mc_halfx_halfy_8x8; + ps_dec->pf_fullx_fully_8x8 = &impeg2_mc_fullx_fully_8x8; + + ps_dec->pf_memset_8bit_8x8_block = &impeg2_memset_8bit_8x8_block; + ps_dec->pf_memset_16bit_8x8_linear_block = &impeg2_memset0_16bit_8x8_linear_block; + + ps_dec->pf_copy_yuv420p_buf = &impeg2_copy_frm_yuv420p; + ps_dec->pf_fmt_conv_yuv420p_to_yuv422ile = &impeg2_fmt_conv_yuv420p_to_yuv422ile; + ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv = &impeg2_fmt_conv_yuv420p_to_yuv420sp_uv; + ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu = &impeg2_fmt_conv_yuv420p_to_yuv420sp_vu; +} diff --git a/decoder/impeg2d_globals.c b/decoder/impeg2d_globals.c new file mode 100644 index 0000000..8c71ecf --- /dev/null +++ b/decoder/impeg2d_globals.c @@ -0,0 +1,158 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <stdio.h> +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_globals.h" +#include "impeg2d_mc.h" + +/*****************************************************************************/ +/* MC params table */ +/*****************************************************************************/ +const mc_type_consts_t gas_impeg2d_mc_params_luma[][2] = +{ + /* frame prediction in P frame picture */ + {{1,0,1,1,MB_SIZE,MB_SIZE,0}, + {1,0,1,1,MB_SIZE,MB_SIZE,0}}, + /* field prediction in P frame picture */ + {{2,0,1,2,MB_SIZE/2,MB_SIZE*2,0}, + {2,0,1,2,MB_SIZE/2,MB_SIZE*2,1}}, + /* frame prediction in B frame picture */ + {{1,0,1,1,MB_SIZE,MB_SIZE,0}, + {1,0,1,1,MB_SIZE,MB_SIZE,0}}, + /* field prediction in B frame picture */ + {{2,0,1,2,MB_SIZE/2,MB_SIZE*2,0}, + {2,0,1,2,MB_SIZE/2,MB_SIZE*2,1}}, + /* dual prime prediction in P frame picture */ + {{2,0,1,2,MB_SIZE/2,MB_SIZE*2,0}, + {2,0,1,2,MB_SIZE/2,MB_SIZE*2,1}}, + + /* field prediction in P field picture */ + {{1,0,2,2,MB_SIZE,MB_SIZE,0},{1,0,2,2,MB_SIZE,MB_SIZE,0}}, + /* 16x8 prediction in P field picture */ + {{1,0,2,2,MB_SIZE/2,MB_SIZE,0},{1,8,2,2,MB_SIZE/2,MB_SIZE,(1*MB_SIZE/2)}}, + /* field prediction in B field picture */ + {{1,0,2,2,MB_SIZE,MB_SIZE,0},{1,0,2,2,MB_SIZE,MB_SIZE,0}}, + /* 16x8 prediction in B field picture */ + {{1,0,2,2,MB_SIZE/2,MB_SIZE,0},{1,8,2,2,MB_SIZE/2,MB_SIZE,(1*MB_SIZE/2)}}, + /* dual prime prediction in P field picture */ + {{1,0,2,2,MB_SIZE,MB_SIZE,0},{1,0,2,2,MB_SIZE,MB_SIZE,0}} + +}; + +const mc_type_consts_t gas_impeg2d_mc_params_chroma[10][2] = +{ + /* frame prediction in P frame picture */ + {{1,0,1,1,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,1,1,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0}}, + /* field prediction in P frame picture */ + {{2,0,1,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE*2,0},{2,0,1,2,MB_CHROMA_SIZE/2, + MB_CHROMA_SIZE*2,1}}, + /* frame prediction in B frame picture */ + {{1,0,1,1,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,1,1,MB_CHROMA_SIZE, + MB_CHROMA_SIZE,0}}, + /* field prediction in B frame picture */ + {{2,0,1,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE*2,0},{2,0,1,2,MB_CHROMA_SIZE/2, + MB_CHROMA_SIZE*2,1}}, + /* dual prime prediction in P frame picture */ + {{2,0,1,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE*2,0},{2,0,1,2,MB_CHROMA_SIZE/2, + MB_CHROMA_SIZE*2,1}}, + + /* field prediction in P field picture */ + {{1,0,2,2,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,2,2,MB_CHROMA_SIZE, + MB_CHROMA_SIZE,0}}, + /* 16x8 prediction in P field picture */ + {{1,0,2,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE,0},{1,4,2,2,MB_CHROMA_SIZE/2, + MB_CHROMA_SIZE,(1*MB_CHROMA_SIZE/2)}}, + /* field prediction in B field picture */ + {{1,0,2,2,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,2,2,MB_CHROMA_SIZE, + MB_CHROMA_SIZE,0}}, + /* 16x8 prediction in B field picture */ + {{1,0,2,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE,0},{1,4,2,2,MB_CHROMA_SIZE/2, + MB_CHROMA_SIZE,(1*MB_CHROMA_SIZE/2)}}, + /* dual prime prediction in P field picture */ + {{1,0,2,2,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,2,2,MB_CHROMA_SIZE, + MB_CHROMA_SIZE,0}} + +}; + +/*****************************************************************************/ +/* MC function pointer table */ +/*****************************************************************************/ +const dec_mb_params_t gas_impeg2d_func_frm_fw_or_bk[4] = +{ + /*0MV*/ + {impeg2d_dec_1mv_mb,MC_FRM_FW_OR_BK_1MV,impeg2d_mc_1mv}, + /* motion_type Field based */ + {impeg2d_dec_2mv_fw_or_bk_mb,MC_FRM_FW_OR_BK_2MV,impeg2d_mc_fw_or_bk_mb}, + /* motion_type Frame based */ + {impeg2d_dec_1mv_mb,MC_FRM_FW_OR_BK_1MV,impeg2d_mc_1mv}, + /* motion_type Dual prime based */ + {impeg2d_dec_frm_dual_prime,MC_FRM_FW_DUAL_PRIME_1MV,impeg2d_mc_frm_dual_prime}, +}; + +const dec_mb_params_t gas_impeg2d_func_fld_fw_or_bk[4] = +{ + /*0MV*/ + {impeg2d_dec_1mv_mb,MC_FRM_FW_OR_BK_1MV,impeg2d_mc_1mv}, + /* motion_type Field based */ + {impeg2d_dec_1mv_mb,MC_FLD_FW_OR_BK_1MV,impeg2d_mc_1mv}, + /* motion_type 16x8 MC */ + {impeg2d_dec_2mv_fw_or_bk_mb,MC_FLD_FW_OR_BK_2MV,impeg2d_mc_fw_or_bk_mb}, + /* motion_type Dual prime based */ + {impeg2d_dec_fld_dual_prime,MC_FLD_FW_DUAL_PRIME_1MV,impeg2d_mc_fld_dual_prime}, +}; + + +const dec_mb_params_t gas_impeg2d_func_frm_bi_direct[4] = +{ + {NULL,MC_FRM_FW_OR_BK_1MV,NULL}, + /* motion_type Field based */ + {impeg2d_dec_4mv_mb,MC_FRM_FW_AND_BK_4MV,impeg2d_mc_4mv}, + /* motion_type Frame based */ + {impeg2d_dec_2mv_interp_mb,MC_FRM_FW_AND_BK_2MV,impeg2d_mc_2mv}, + /* Reserved not applicable */ + {NULL,MC_FRM_FW_OR_BK_1MV,NULL}, +}; + +const dec_mb_params_t gas_impeg2d_func_fld_bi_direct[4] = +{ + {NULL,MC_FRM_FW_OR_BK_1MV,NULL}, + /* motion_type Field based */ + {impeg2d_dec_2mv_interp_mb,MC_FLD_FW_AND_BK_2MV,impeg2d_mc_2mv}, + /* motion_type 16x8 MC */ + {impeg2d_dec_4mv_mb,MC_FLD_FW_AND_BK_4MV,impeg2d_mc_4mv}, + /* Reserved not applicable */ + {NULL,MC_FRM_FW_OR_BK_1MV,NULL}, +}; diff --git a/decoder/impeg2d_globals.h b/decoder/impeg2d_globals.h new file mode 100644 index 0000000..5b9c093 --- /dev/null +++ b/decoder/impeg2d_globals.h @@ -0,0 +1,43 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2D_GLOBALS_H__ +#define __IMPEG2D_GLOBALS_H__ + +typedef struct +{ + UWORD16 mvy_cf; + UWORD16 mv_num_cf; + UWORD16 frm_wd_cf; + UWORD16 src_wd_cf; + UWORD32 rows; + UWORD32 dst_wd; + UWORD32 dst_offset_scale; +}mc_type_consts_t; + +extern const mc_type_consts_t gas_impeg2d_mc_params_luma[][2]; +extern const mc_type_consts_t gas_impeg2d_mc_params_chroma[][2]; + +extern const dec_mb_params_t gas_impeg2d_func_frm_fw_or_bk[]; +extern const dec_mb_params_t gas_impeg2d_func_fld_fw_or_bk[]; + +extern const dec_mb_params_t gas_impeg2d_func_frm_bi_direct[]; +extern const dec_mb_params_t gas_impeg2d_func_fld_bi_direct[]; + +#endif /* __IMPEG2D_GLOBALS_H__ */ diff --git a/decoder/impeg2d_i_pic.c b/decoder/impeg2d_i_pic.c new file mode 100644 index 0000000..1b45350 --- /dev/null +++ b/decoder/impeg2d_i_pic.c @@ -0,0 +1,328 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" +#include "impeg2d_globals.h" +#include "impeg2d_mv_dec.h" + +/******************************************************************************* +* Function Name : impeg2d_dec_i_mb_params +* +* Description : Decoding I MB parameters. +* +* Arguments : +* dec : Decoder state +* stream : Bitstream +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_i_mb_params(dec_state_t *ps_dec) +{ + + UWORD16 u2_next_bits; + UWORD16 u2_bits_to_flush; + stream_t *ps_stream = &ps_dec->s_bit_stream; + + /*-----------------------------------------------------------------------*/ + /* Flush the MBAddrIncr Bit */ + /* */ + /* Since we are not supporting scalable modes there won't be skipped */ + /* macroblocks in I-Picture and the MBAddrIncr will always be 1, */ + /* The MBAddrIncr can never be greater than 1 for the simple and main */ + /* profile MPEG2. */ + /*-----------------------------------------------------------------------*/ + if(impeg2d_bit_stream_nxt(ps_stream,1) == 1) //Making sure the increment is one. + { + impeg2d_bit_stream_flush(ps_stream,1); + } + else if(ps_dec->u2_first_mb && ps_dec->u2_mb_x) + { + WORD32 i4_mb_add_inc = impeg2d_get_mb_addr_incr(ps_stream); + + //VOLParams->FirstInSlice = 0; + /****************************************************************/ + /* Section 6.3.17 */ + /* The first MB of a slice cannot be skipped */ + /* But the mb_addr_incr can be > 1, because at the beginning of */ + /* a slice, it indicates the offset from the last MB in the */ + /* previous row. Hence for the first slice in a row, the */ + /* mb_addr_incr needs to be 1. */ + /****************************************************************/ + /* MB_x is set to zero whenever MB_y changes. */ + + ps_dec->u2_mb_x = i4_mb_add_inc - 1; + ps_dec->u2_mb_x = MIN(ps_dec->u2_mb_x, (ps_dec->u2_num_horiz_mb - 1)); + } + + /*-----------------------------------------------------------------------*/ + /* Decode the macroblock_type, dct_type and quantiser_scale_code */ + /* */ + /* macroblock_type 2 bits [can be either 1 or 01] */ + /* dct_type 1 bit */ + /* quantiser_scale_code 5 bits */ + /*-----------------------------------------------------------------------*/ + u2_next_bits = impeg2d_bit_stream_nxt(ps_stream,8); + if(BIT(u2_next_bits,7) == 1) + { + /* read the dct_type if needed */ + u2_bits_to_flush = 1; + if(ps_dec->u2_read_dct_type) + { + u2_bits_to_flush++; + ps_dec->u2_field_dct = BIT(u2_next_bits,6); + } + } + else + { + u2_bits_to_flush = 7; + /*------------------------------------------------------------------*/ + /* read the dct_type if needed */ + /*------------------------------------------------------------------*/ + if(ps_dec->u2_read_dct_type) + { + u2_bits_to_flush++; + ps_dec->u2_field_dct = BIT(u2_next_bits,5); + } + else + { + u2_next_bits >>= 1; + } + /*------------------------------------------------------------------*/ + /* Quant scale code decoding */ + /*------------------------------------------------------------------*/ + { + UWORD16 quant_scale_code; + quant_scale_code = u2_next_bits & 0x1F; + + ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ? + gau1_impeg2_non_linear_quant_scale[quant_scale_code] : + (quant_scale_code << 1); + } + } + impeg2d_bit_stream_flush(ps_stream,u2_bits_to_flush); + /*************************************************************************/ + /* Decoding of motion vectors if concealment motion vectors are present */ + /*************************************************************************/ + if(ps_dec->u2_concealment_motion_vectors) + { + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + impeg2d_bit_stream_flush(ps_stream,1); + impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],ps_dec->ai2_mv[FORW][FIRST], + ps_dec->au2_f_code[FORW],0,0); + + /* Flush the marker bit */ + if(0 == (impeg2d_bit_stream_get(ps_stream,1))) + { + /* Ignore marker bit error */ + } + + } + ps_dec->u2_first_mb = 0; + return; +} +/******************************************************************************* +* Function Name : impeg2d_dec_i_slice +* +* Description : Decodes I slice +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_i_slice(dec_state_t *ps_dec) +{ + WORD16 *pi2_vld_out; + UWORD32 i; + yuv_buf_t *ps_cur_frm_buf = &ps_dec->s_cur_frm_buf; + + UWORD32 u4_frame_width = ps_dec->u2_frame_width; + UWORD32 u4_frm_offset = 0; + UWORD8 *pu1_out_p; + IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; + + + pi2_vld_out = ps_dec->ai2_vld_buf; + + + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + { + u4_frame_width <<= 1; + if(ps_dec->u2_picture_structure == BOTTOM_FIELD) + { + u4_frm_offset = ps_dec->u2_frame_width; + } + } + + do + { + UWORD32 u4_x_offset,u4_y_offset; + UWORD32 u4_blk_pos; + UWORD32 u4_x_dst_offset = 0; + UWORD32 u4_y_dst_offset = 0; + + + IMPEG2D_TRACE_MB_START(ps_dec->u2_mb_x, ps_dec->u2_mb_y); + + impeg2d_dec_i_mb_params(ps_dec); + + u4_x_dst_offset = u4_frm_offset + (ps_dec->u2_mb_x << 4); + u4_y_dst_offset = (ps_dec->u2_mb_y << 4) * u4_frame_width; + pu1_out_p = ps_cur_frm_buf->pu1_y + u4_x_dst_offset + u4_y_dst_offset; + + for(i = 0; i < NUM_LUMA_BLKS; ++i) + { + + e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, + ps_dec->pu1_inv_scan_matrix, 1, Y_LUMA, 0); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + u4_x_offset = gai2_impeg2_blk_x_off[i]; + + if(ps_dec->u2_field_dct == 0) + u4_y_offset = gai2_impeg2_blk_y_off_frm[i] ; + else + u4_y_offset = gai2_impeg2_blk_y_off_fld[i] ; + + u4_blk_pos = u4_y_offset * u4_frame_width + u4_x_offset; + IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + + PROFILE_DISABLE_IDCT_IF0 + { + WORD32 i4_idx; + i4_idx = 1; + if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + i4_idx = 0; + + ps_dec->pf_idct_recon[i4_idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out, + ps_dec->ai2_idct_stg1, + (UWORD8 *)gau1_impeg2_zerobuf, + pu1_out_p + u4_blk_pos, + 8, + 8, + u4_frame_width << ps_dec->u2_field_dct, + ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows); + + } + + } + + /* For U and V blocks, divide the x and y offsets by 2. */ + u4_x_dst_offset >>= 1; + u4_y_dst_offset >>= 2; + + /* In case of chrominance blocks the DCT will be frame DCT */ + /* i = 0, U component and */ + + e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, + ps_dec->pu1_inv_scan_matrix, 1, U_CHROMA, 0); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + pu1_out_p = ps_cur_frm_buf->pu1_u + u4_x_dst_offset + u4_y_dst_offset; + IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + PROFILE_DISABLE_IDCT_IF0 + { + WORD32 i4_idx; + i4_idx = 1; + if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + i4_idx = 0; + + ps_dec->pf_idct_recon[i4_idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out, + ps_dec->ai2_idct_stg1, + (UWORD8 *)gau1_impeg2_zerobuf, + pu1_out_p, + 8, + 8, + u4_frame_width >> 1, + ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows); + + } + /* Write the idct_out block to the current frame dec->curFrame*/ + /* In case of field DCT type, write to alternate lines */ + e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, + ps_dec->pu1_inv_scan_matrix, 1, V_CHROMA, 0); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + pu1_out_p = ps_cur_frm_buf->pu1_v + u4_x_dst_offset + u4_y_dst_offset; + IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + PROFILE_DISABLE_IDCT_IF0 + { + WORD32 i4_idx; + i4_idx = 1; + if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + i4_idx = 0; + ps_dec->pf_idct_recon[i4_idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out, + ps_dec->ai2_idct_stg1, + (UWORD8 *)gau1_impeg2_zerobuf, + pu1_out_p, + 8, + 8, + u4_frame_width >> 1, + ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows); + } + ps_dec->u2_num_mbs_left--; + + + ps_dec->u2_mb_x++; + + if(ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset) + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + else if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset)) + { + ps_dec->u2_mb_x = 0; + ps_dec->u2_mb_y++; + } + + } + while(ps_dec->u2_num_mbs_left != 0 && impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,23) != 0x0); + return e_error; +} diff --git a/decoder/impeg2d_mc.c b/decoder/impeg2d_mc.c new file mode 100644 index 0000000..da13a8c --- /dev/null +++ b/decoder/impeg2d_mc.c @@ -0,0 +1,1373 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_mc.c +* +* @brief +* Contains MC function definitions for MPEG2 decoder +* +* @author +* Harish +* +* @par List of Functions: +* - impeg2d_motion_comp() +* - impeg2d_motion_comp_recon_buf() +* - impeg2d_mc_1mv() +* - impeg2d_mc_fw_or_bk_mb() +* - impeg2d_mc_frm_dual_prime() +* - impeg2d_mc_fld_dual_prime() +* - impeg2d_mc_4mv() +* - impeg2d_mc_2mv() +* - impeg2d_dec_intra_mb() +* - impeg2d_dec_skip_p_mb() +* - impeg2d_dec_skip_b_mb() +* - impeg2d_dec_skip_mbs() +* - impeg2d_dec_0mv_coded_mb() +* - impeg2d_mc_halfx_halfy() +* - impeg2d_mc_halfx_fully() +* - impeg2d_mc_fullx_halfy() +* - impeg2d_mc_fullx_fully() +* - impeg2d_set_mc_params() +* +* @remarks +* None +* +******************************************************************************* +*/ +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_globals.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" +#include "impeg2d_mv_dec.h" +#include "impeg2d_mc.h" + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_motion_comp */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_motion_comp(dec_state_t *ps_dec, mb_mc_params_t *ps_params,yuv_buf_t *ps_buf) +{ + + PROFILE_DISABLE_MC_RETURN; + + /* Perform motion compensation for Y */ + ps_dec->pf_mc[ps_params->s_luma.u4_mode]((void *)ps_dec, ps_params->s_ref.pu1_y + ps_params->s_luma.u4_src_offset, + ps_params->s_luma.u4_src_wd, + ps_buf->pu1_y + ps_params->s_luma.u4_dst_offset_res_buf, + ps_params->s_luma.u4_dst_wd_res_buf, + ps_params->s_luma.u4_cols, + ps_params->s_luma.u4_rows); + /* Perform motion compensation for U */ + ps_dec->pf_mc[ps_params->s_chroma.u4_mode]((void *)ps_dec, ps_params->s_ref.pu1_u + ps_params->s_chroma.u4_src_offset, + ps_params->s_chroma.u4_src_wd, + ps_buf->pu1_u + ps_params->s_chroma.u4_dst_offset_res_buf, + ps_params->s_chroma.u4_dst_wd_res_buf, + ps_params->s_chroma.u4_cols, + ps_params->s_chroma.u4_rows); + + /* Perform motion compensation for V */ + ps_dec->pf_mc[ps_params->s_chroma.u4_mode]((void *)ps_dec, ps_params->s_ref.pu1_v + ps_params->s_chroma.u4_src_offset, + ps_params->s_chroma.u4_src_wd, + ps_buf->pu1_v + ps_params->s_chroma.u4_dst_offset_res_buf, + ps_params->s_chroma.u4_dst_wd_res_buf, + ps_params->s_chroma.u4_cols, + ps_params->s_chroma.u4_rows); +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_motion_comp_recon_buf */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_motion_comp_recon_buf(dec_state_t *ps_dec, + mb_mc_params_t *ps_params, + yuv_buf_t *ps_dest_buf) +{ + + PROFILE_DISABLE_MC_RETURN; + + /* Perform motion compensation for Y */ + ps_dec->pf_mc[ps_params->s_luma.u4_mode](ps_dec, ps_params->s_ref.pu1_y + ps_params->s_luma.u4_src_offset, + ps_params->s_luma.u4_src_wd, + ps_dest_buf->pu1_y + ps_params->s_luma.u4_dst_offset_cur_frm, + ps_params->s_luma.u4_dst_wd_cur_frm, + ps_params->s_luma.u4_cols, + ps_params->s_luma.u4_rows); + + /* Perform motion compensation for U */ + + ps_dec->pf_mc[ps_params->s_chroma.u4_mode](ps_dec, ps_params->s_ref.pu1_u + ps_params->s_chroma.u4_src_offset, + ps_params->s_chroma.u4_src_wd, + ps_dest_buf->pu1_u + ps_params->s_chroma.u4_dst_offset_cur_frm, + ps_params->s_chroma.u4_dst_wd_cur_frm, + ps_params->s_chroma.u4_cols, + ps_params->s_chroma.u4_rows); + + /* Perform motion compensation for V */ + ps_dec->pf_mc[ps_params->s_chroma.u4_mode](ps_dec, ps_params->s_ref.pu1_v + ps_params->s_chroma.u4_src_offset, + ps_params->s_chroma.u4_src_wd, + ps_dest_buf->pu1_v + ps_params->s_chroma.u4_dst_offset_cur_frm, + ps_params->s_chroma.u4_dst_wd_cur_frm, + ps_params->s_chroma.u4_cols, + ps_params->s_chroma.u4_rows); +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_1mv */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_1mv(dec_state_t *ps_dec) +{ + + impeg2d_motion_comp_recon_buf(ps_dec, &ps_dec->as_mb_mc_params[ps_dec->e_mb_pred][FIRST], &ps_dec->s_dest_buf); +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_fw_or_bk_mb */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_fw_or_bk_mb(dec_state_t *ps_dec) +{ + impeg2d_motion_comp_recon_buf(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_dest_buf); + impeg2d_motion_comp_recon_buf(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_dest_buf); +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_frm_dual_prime */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_frm_dual_prime(dec_state_t *ps_dec) +{ + /************************************************************************/ + /* Perform Motion Compensation */ + /************************************************************************/ + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf); + + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_mc_fw_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][SECOND], &ps_dec->s_mc_bk_buf); + + + + ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width); +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_fld_dual_prime */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_fld_dual_prime(dec_state_t *ps_dec) +{ + /************************************************************************/ + /* Perform Motion Compensation */ + /************************************************************************/ + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_mc_bk_buf); + + + ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width); +} + + + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_4mv */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_4mv(dec_state_t *ps_dec) +{ + /************************************************************************/ + /* Perform Motion Compensation */ + /************************************************************************/ + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_mc_fw_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][SECOND], &ps_dec->s_mc_bk_buf); + + ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_2mv */ +/* */ +/* Description : Perform motion compensation and store the resulting block*/ +/* in the buf */ +/* */ +/* Inputs : params - Parameters required to do motion compensation */ +/* */ +/* Globals : */ +/* */ +/* Processing : Calls appropriate functions depending on the mode of */ +/* compensation */ +/* */ +/* Outputs : buf - Buffer for the motion compensation result */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 14 09 2005 Hairsh M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_2mv(dec_state_t *ps_dec) +{ + /************************************************************************/ + /* Perform Motion Compensation */ + /************************************************************************/ + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf); + + ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width); +} + +/***************************************************************************** +* Function Name : impeg2d_dec_intra_mb +* +* Description : Performs decoding of Intra MB +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_intra_mb(dec_state_t *ps_dec) +{ + + ps_dec->u2_cbp = 0x3F; + if(ps_dec->u2_concealment_motion_vectors) + { + + stream_t *ps_stream; + + ps_stream = &ps_dec->s_bit_stream; + /* Decode the concealment motion vector */ + impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],ps_dec->ai2_mv[FORW][FIRST], + ps_dec->au2_f_code[FORW],0,ps_dec->u2_fld_pic); + + + /* Set the second motion vector predictor */ + ps_dec->ai2_pred_mv[FORW][SECOND][MV_X] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X]; + ps_dec->ai2_pred_mv[FORW][SECOND][MV_Y] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y]; + + /* Flush the marker bit */ + if(0 == (impeg2d_bit_stream_get(ps_stream,1))) + { + /* Ignore marker bit error */ + } + } + else + { + /* Reset the motion vector predictors */ + memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv)); + } +} + +/***************************************************************************** +* Function Name : impeg2d_dec_skip_p_mb +* +* Description : Performs decoding needed for Skipped MB encountered in +* P Pictures and B Pictures with previous MB not bi-predicted +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_skip_p_mb(dec_state_t *ps_dec, WORD32 u4_num_of_mbs) +{ + WORD16 *pi2_mv; + + e_mb_type_t e_mb_type; + mb_mc_params_t *ps_mc; + + + WORD32 i4_iter; + UWORD32 u4_dst_wd; + UWORD32 u4_dst_offset_x; + UWORD32 u4_dst_offset_y; + UWORD32 u4_frm_offset = 0; + yuv_buf_t s_dst; + + u4_dst_wd = ps_dec->u2_frame_width; + + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + { + u4_dst_wd <<= 1; + if(ps_dec->u2_picture_structure == BOTTOM_FIELD) + { + u4_frm_offset = ps_dec->u2_frame_width; + } + } + + for (i4_iter = u4_num_of_mbs; i4_iter > 0; i4_iter--) + { + if(ps_dec->u2_picture_structure == FRAME_PICTURE) + { + e_mb_type = MC_FRM_FW_AND_BK_2MV; + } + else + { + e_mb_type = MC_FLD_FW_AND_BK_2MV; + } + + ps_dec->u2_prev_intra_mb = 0; + pi2_mv = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]); + + /* Reset the motion vector predictors */ + if(ps_dec->e_pic_type == P_PIC) + { + memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv)); + pi2_mv[MV_X] = pi2_mv[MV_Y] = 0; + + ps_dec->u2_cbp = 0; + + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST]; + ps_mc = &ps_dec->as_mb_mc_params[FORW][FIRST]; + ps_mc->s_ref = ps_dec->as_ref_buf[ps_dec->e_mb_pred][ps_dec->u2_fld_parity]; + + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + + + u4_dst_offset_x = (ps_dec->u2_mb_x << 4) + u4_frm_offset; + u4_dst_offset_y = (ps_dec->u2_mb_y << 4) * u4_dst_wd; + + s_dst.pu1_y = ps_dec->s_cur_frm_buf.pu1_y + u4_dst_offset_x + u4_dst_offset_y; + + u4_dst_offset_x = u4_dst_offset_x >> 1; + u4_dst_offset_y = u4_dst_offset_y >> 2; + + s_dst.pu1_u = ps_dec->s_cur_frm_buf.pu1_u + u4_dst_offset_x + u4_dst_offset_y; + s_dst.pu1_v = ps_dec->s_cur_frm_buf.pu1_v + u4_dst_offset_x + u4_dst_offset_y; + + + ps_mc->s_ref.pu1_y += ps_mc->s_luma.u4_src_offset; + ps_mc->s_ref.pu1_u += ps_mc->s_chroma.u4_src_offset; + ps_mc->s_ref.pu1_v += ps_mc->s_chroma.u4_src_offset; + + ps_dec->pf_copy_mb(&ps_mc->s_ref, &s_dst, ps_mc->s_luma.u4_src_wd, u4_dst_wd); + } + + else + { + pi2_mv[MV_X] = ps_dec->ai2_pred_mv[ps_dec->e_mb_pred][FIRST][MV_X]; + pi2_mv[MV_Y] = ps_dec->ai2_pred_mv[ps_dec->e_mb_pred][FIRST][MV_Y]; + + ps_dec->u2_cbp = 0; + + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST]; + ps_mc = &ps_dec->as_mb_mc_params[FORW][FIRST]; + ps_mc->s_ref = ps_dec->as_ref_buf[ps_dec->e_mb_pred][ps_dec->u2_fld_parity]; + + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + + u4_dst_offset_x = (ps_dec->u2_mb_x << 4) + u4_frm_offset; + u4_dst_offset_y = (ps_dec->u2_mb_y << 4) * u4_dst_wd; + + ps_mc->s_luma.u4_dst_offset_res_buf = u4_dst_offset_x + u4_dst_offset_y; + ps_mc->s_luma.u4_dst_wd_res_buf = u4_dst_wd; + + u4_dst_offset_x = u4_dst_offset_x >> 1; + u4_dst_offset_y = u4_dst_offset_y >> 2; + + ps_mc->s_chroma.u4_dst_offset_res_buf = u4_dst_offset_x + u4_dst_offset_y; + ps_mc->s_chroma.u4_dst_wd_res_buf = u4_dst_wd >> 1; + + impeg2d_motion_comp(ps_dec, ps_mc, &ps_dec->s_cur_frm_buf); + } + + + /********************************************************************/ + /* Common MB processing tasks */ + /********************************************************************/ + ps_dec->u2_mb_x++; + ps_dec->u2_num_mbs_left--; + + if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset)) + { + ps_dec->u2_mb_x = 0; + ps_dec->u2_mb_y++; + } + } + +} + +/******************************************************************************* +* Function Name : impeg2d_dec_skip_b_mb +* +* Description : Performs processing needed for Skipped MB encountered in +* B Pictures with previous MB bi-predicted. +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_skip_b_mb(dec_state_t *ps_dec, WORD32 u4_num_of_mbs) +{ + + + WORD16 *pi2_mv; + + UWORD32 i; + e_mb_type_t e_mb_type; + mb_mc_params_t *ps_mc; + + WORD32 i4_iter; + UWORD32 u4_dst_wd; + yuv_buf_t s_dst; + UWORD32 u4_dst_offset_x; + UWORD32 u4_dst_offset_y; + UWORD32 u4_frm_offset = 0; + + u4_dst_wd = ps_dec->u2_frame_width; + s_dst = ps_dec->s_cur_frm_buf; + + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + { + u4_dst_wd <<= 1; + if(ps_dec->u2_picture_structure == BOTTOM_FIELD) + { + u4_frm_offset = ps_dec->u2_frame_width; + } + } + + for (i4_iter = u4_num_of_mbs; i4_iter > 0; i4_iter--) + { + ps_dec->u2_prev_intra_mb = 0; + + if(ps_dec->u2_picture_structure == FRAME_PICTURE) + { + e_mb_type = MC_FRM_FW_AND_BK_2MV; + } + else + { + e_mb_type = MC_FLD_FW_AND_BK_2MV; + } + + /************************************************************************/ + /* Setting of first motion vector for B MB */ + /************************************************************************/ + pi2_mv = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]); + { + pi2_mv[MV_X] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X]; + pi2_mv[MV_Y] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y]; + } + /************************************************************************/ + /* Setting of second motion vector for B MB */ + /************************************************************************/ + pi2_mv = (WORD16 *)&(ps_dec->ai2_mv[BACK][FIRST]); + { + pi2_mv[MV_X] = ps_dec->ai2_pred_mv[BACK][FIRST][MV_X]; + pi2_mv[MV_Y] = ps_dec->ai2_pred_mv[BACK][FIRST][MV_Y]; + } + ps_dec->u2_cbp = 0; + + for(i = 0; i < 2; i++) + { + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[i][FIRST]; + ps_mc = &ps_dec->as_mb_mc_params[i][FIRST]; + ps_mc->s_ref = ps_dec->as_ref_buf[i][ps_dec->u2_fld_parity]; + + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0, pi2_mv, ps_dec->u2_mb_x, + ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + } + + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf); + impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf); + + u4_dst_offset_x = (ps_dec->u2_mb_x << 4) + u4_frm_offset; + u4_dst_offset_y = (ps_dec->u2_mb_y << 4) * u4_dst_wd; + + s_dst.pu1_y = ps_dec->s_cur_frm_buf.pu1_y + u4_dst_offset_x + u4_dst_offset_y; + + u4_dst_offset_x = u4_dst_offset_x >> 1; + u4_dst_offset_y = u4_dst_offset_y >> 2; + + s_dst.pu1_u = ps_dec->s_cur_frm_buf.pu1_u + u4_dst_offset_x + u4_dst_offset_y; + s_dst.pu1_v = ps_dec->s_cur_frm_buf.pu1_v + u4_dst_offset_x + u4_dst_offset_y; + + ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&s_dst, u4_dst_wd); +// dec->pf_copy_mb(&dec->mc_buf, &dst, MB_SIZE, dst_wd); + + /********************************************************************/ + /* Common MB processing tasks */ + /********************************************************************/ + ps_dec->u2_mb_x++; + ps_dec->u2_num_mbs_left--; + + if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset)) + { + ps_dec->u2_mb_x = 0; + ps_dec->u2_mb_y++; + } + } +} +/******************************************************************************* +* Function Name : impeg2d_dec_skip_mbs +* +* Description : Performs processing needed for Skipped MB encountered in +* B Pictures with previous MB bi-predicted. +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_skip_mbs(dec_state_t *ps_dec, UWORD16 u2_num_skip_mbs) +{ + PROFILE_DISABLE_SKIP_MB(); + + if(ps_dec->e_mb_pred == BIDIRECT) + { + impeg2d_dec_skip_b_mb(ps_dec, u2_num_skip_mbs); + } + else + { + impeg2d_dec_skip_p_mb(ps_dec, u2_num_skip_mbs); + } + + ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; +} + + + + +/***************************************************************************** +* Function Name : impeg2d_dec_0mv_coded_mb +* +* Description : Decodes the MB with 0 MV but coded. This can occur in P +* pictures only +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_0mv_coded_mb(dec_state_t *ps_dec) +{ + + + WORD16 *pi2_mv; + e_mb_type_t e_mb_type; + mb_mc_params_t *ps_mc; + + if(ps_dec->u2_picture_structure == FRAME_PICTURE) + { + e_mb_type = MC_FRM_FW_AND_BK_2MV; + } + else + { + e_mb_type = MC_FLD_FW_AND_BK_2MV; + } + + + + + /* Reset the motion vector predictors */ + memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv)); + + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST]; + ps_mc = &ps_dec->as_mb_mc_params[FORW][FIRST]; + ps_mc->s_ref = ps_dec->as_ref_buf[FORW][ps_dec->u2_fld_parity]; + + pi2_mv[MV_X] = 0; + pi2_mv[MV_Y] = 0; + + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_halfx_halfy() */ +/* */ +/* Description : Gets the buffer from (0.5,0.5) to (8.5,8.5) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0),(1,0),(0,1),(1,1) position in */ +/* the ref frame.Interpolate these four values to get the */ +/* value at(0.5,0.5).Repeat this to get an 8 x 8 block */ +/* using 9 x 9 block from reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_halfx_halfy(void *pv_dec, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD8 *pu1_out, + UWORD32 u4_out_wid, + UWORD32 u4_blk_width, + UWORD32 u4_blk_height) +{ + UWORD8 *pu1_out_ptr,*pu1_ref_ptr; + dec_state_t *ps_dec = (dec_state_t *)pv_dec; + + pu1_out_ptr = pu1_out; + pu1_ref_ptr = pu1_ref; + + if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE)) + { + + /*luma 16 x 16*/ + + /*block 0*/ + ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 2*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid; + ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 3*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE; + ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + + + + } + else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*chroma 8 x 8*/ + ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + } + else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*block 0*/ + ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + } + + else + { + UWORD8 *ref_p0,*ref_p1,*ref_p2,*ref_p3; + UWORD32 i,j; + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 P1 + Q + P2 P3 + */ + + ref_p0 = pu1_ref; + ref_p1 = pu1_ref + 1; + ref_p2 = pu1_ref + u4_ref_wid; + ref_p3 = pu1_ref + u4_ref_wid + 1; + + for(i = 0; i < u4_blk_height; i++) + { + for(j = 0; j < u4_blk_width; j++) + { + *pu1_out++ = (( (*ref_p0++ ) + + (*ref_p1++ ) + + (*ref_p2++ ) + + (*ref_p3++ ) + 2 ) >> 2); + } + ref_p0 += u4_ref_wid - u4_blk_width; + ref_p1 += u4_ref_wid - u4_blk_width; + ref_p2 += u4_ref_wid - u4_blk_width; + ref_p3 += u4_ref_wid - u4_blk_width; + + pu1_out += u4_out_wid - u4_blk_width; + } + } + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_halfx_fully() */ +/* */ +/* Description : Gets the buffer from (0.5,0) to (8.5,8) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) and (1,0) position in the ref frame */ +/* Interpolate these two values to get the value at(0.5,0) */ +/* Repeat this to get an 8 x 8 block using 9 x 8 block from */ +/* reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ + +void impeg2d_mc_halfx_fully(void *pv_dec, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD8 *pu1_out, + UWORD32 u4_out_wid, + UWORD32 u4_blk_width, + UWORD32 u4_blk_height) +{ + UWORD8 *pu1_out_ptr,*pu1_ref_ptr; + dec_state_t *ps_dec = (dec_state_t *)pv_dec; + + pu1_out_ptr = pu1_out; + pu1_ref_ptr = pu1_ref; + + if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE)) + { + + /*luma 16 x 16*/ + + /*block 0*/ + ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 2*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid; + ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 3*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE; + ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + + + + } + else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*chroma 8 x 8*/ + ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + } + else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*block 0*/ + ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + } + + else + { + UWORD8 *ref_p0,*ref_p1; + UWORD32 i,j; + + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 Q P1 + */ + + ref_p0 = pu1_ref; + ref_p1 = pu1_ref + 1; + + for(i = 0; i < u4_blk_height; i++) + { + for(j = 0; j < u4_blk_width; j++) + { + *pu1_out++ = ((( *ref_p0++ ) + + (*ref_p1++) + 1 ) >> 1); + } + ref_p0 += u4_ref_wid - u4_blk_width; + ref_p1 += u4_ref_wid - u4_blk_width; + + pu1_out += u4_out_wid - u4_blk_width; + } + } + return; +} + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_fullx_halfy() */ +/* */ +/* Description : Gets the buffer from (0,0.5) to (8,8.5) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) and (0,1) position in the ref frame */ +/* Interpolate these two values to get the value at(0,0.5) */ +/* Repeat this to get an 8 x 8 block using 8 x 9 block from */ +/* reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +void impeg2d_mc_fullx_halfy(void *pv_dec, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD8 *pu1_out, + UWORD32 u4_out_wid, + UWORD32 u4_blk_width, + UWORD32 u4_blk_height) +{ + + UWORD8 *pu1_out_ptr,*pu1_ref_ptr; + dec_state_t *ps_dec = (dec_state_t *)pv_dec; + pu1_out_ptr = pu1_out; + pu1_ref_ptr = pu1_ref; + + if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE)) + { + + /*luma 16 x 16*/ + + /*block 0*/ + ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 2*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid; + ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 3*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE; + ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + + + + } + else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*chroma 8 x 8*/ + ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + } + else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*block 0*/ + ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + } + + else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == (BLK_SIZE / 2))) + { + UWORD8 *ref_p0,*ref_p1; + UWORD32 i,j; + /* P0-P3 are the pixels in the reference frame and Q is the value being */ + /* estimated */ + /* + P0 + x + P1 + */ + ref_p0 = pu1_ref; + ref_p1 = pu1_ref + u4_ref_wid; + + for(i = 0; i < u4_blk_height; i++) + { + for(j = 0; j < u4_blk_width; j++) + { + *pu1_out++ = ((( *ref_p0++) + + (*ref_p1++) + 1 ) >> 1); + } + ref_p0 += u4_ref_wid - u4_blk_width; + ref_p1 += u4_ref_wid - u4_blk_width; + + pu1_out += u4_out_wid - u4_blk_width; + } + } + return; +} + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_mc_fullx_fully() */ +/* */ +/* Description : Gets the buffer from (x,y) to (x+8,y+8) */ +/* and the above block of size 8 x 8 will be placed as a */ +/* block from the current position of out_buf */ +/* */ +/* Inputs : ref - Reference frame from which the block will be */ +/* block will be extracted. */ +/* ref_wid - WIdth of reference frame */ +/* out_wid - WIdth of the output frame */ +/* blk_width - width of the block */ +/* blk_width - height of the block */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Point to the (0,0) position in the ref frame */ +/* Get an 8 x 8 block from reference frame */ +/* */ +/* Outputs : out - Output containing the extracted block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ + +void impeg2d_mc_fullx_fully(void *pv_dec, + UWORD8 *pu1_ref, + UWORD32 u4_ref_wid, + UWORD8 *pu1_out, + UWORD32 u4_out_wid, + UWORD32 u4_blk_width, + UWORD32 u4_blk_height) +{ + + UWORD8 *pu1_out_ptr,*pu1_ref_ptr; + dec_state_t *ps_dec = (dec_state_t *)pv_dec; + + pu1_out_ptr = pu1_out; + pu1_ref_ptr = pu1_ref; + + if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE)) + { + + /*luma 16 x 16*/ + + /*block 0*/ + ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 2*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid; + ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 3*/ + pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE; + pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE; + ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + + + + } + else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*chroma 8 x 8*/ + ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + } + else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE)) + { + /*block 0*/ + ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + /*block 1*/ + pu1_out_ptr = (pu1_out + BLK_SIZE); + pu1_ref_ptr = (pu1_ref + BLK_SIZE); + ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid); + + } + else + { + UWORD32 i; + + for(i = 0; i < u4_blk_height; i++) + { + memcpy(pu1_out, pu1_ref, u4_blk_width); + pu1_ref += u4_ref_wid; + pu1_out += u4_out_wid; + } + } + return; +} + +/******************************************************************************* +* Function Name : impeg2d_set_mc_params +* +* Description : Sets the parameters for Motion Compensation +* +* Arguments : +* luma : Parameters for luma blocks +* chroma : Parameters for chroma blocks +* type : Motion compensation type +* mv_num : Number of motion vectors +* mv : Motion Vectors +* mb_x : X co-ordinate of MB +* mb_y : Y co-ordinate of MB +* frm_wd : Width of the frame +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_set_mc_params(comp_mc_params_t *ps_luma, + comp_mc_params_t *ps_chroma, + e_mb_type_t e_type, + UWORD16 u2_mv_num, + WORD16 ai2_mv[], + UWORD16 u2_mb_x, + UWORD16 u2_mb_y, + UWORD16 u2_frm_wd, + UWORD16 u2_frm_ht, + UWORD16 u2_picture_width) +{ + WORD16 i2_mvy_round; + WORD16 i2_mvx_round; + const mc_type_consts_t *ps_mc_params; + WORD16 i2_mvx_fullp_round; + WORD16 i2_mvy_fullp_round; + UWORD32 u4_frm_chroma_wd; + WORD16 i2_pix_x, i2_pix_y; + + ps_mc_params = &gas_impeg2d_mc_params_luma[e_type][u2_mv_num]; + /****************************************************************************/ + /* get luma mc params */ + /****************************************************************************/ + i2_pix_x = MB_SIZE * u2_mb_x + (ai2_mv[MV_X]>>1); + i2_pix_y = (MB_SIZE * u2_mb_y + + (ai2_mv[MV_Y]>>1) * ps_mc_params->mvy_cf + u2_mv_num * ps_mc_params->mv_num_cf) * ps_mc_params->frm_wd_cf; + + // clip pix_x and pix_y so as it falls inside the frame boundary + CLIP(i2_pix_x, (u2_frm_wd-16), 0); + CLIP(i2_pix_y, (u2_frm_ht-16), 0); + + ps_luma->u4_src_offset = i2_pix_x + i2_pix_y * u2_frm_wd; + + + /* keep offset in full pel */ + ps_luma->u4_rows = ps_mc_params->rows; + ps_luma->u4_cols = MB_SIZE; + ps_luma->u4_dst_wd_res_buf = ps_mc_params->dst_wd; + ps_luma->u4_src_wd = u2_frm_wd * ps_mc_params->src_wd_cf; + ps_luma->u4_dst_offset_res_buf = ps_mc_params->dst_offset_scale * MB_SIZE; + ps_luma->u4_dst_offset_cur_frm = ps_mc_params->dst_offset_scale * u2_picture_width; + ps_luma->u4_mode = ((ai2_mv[MV_X] & 1) << 1) | (ai2_mv[MV_Y] & 1); + + /****************************************************************************/ + /* get chroma mc params */ + /****************************************************************************/ + ps_mc_params = &gas_impeg2d_mc_params_chroma[e_type][u2_mv_num]; + i2_mvx_round = ((ai2_mv[MV_X] + IS_NEG(ai2_mv[MV_X]))>>1); + i2_mvy_round = ((ai2_mv[MV_Y] + IS_NEG(ai2_mv[MV_Y]))>>1); + + i2_mvx_fullp_round = (i2_mvx_round>>1); + i2_mvy_fullp_round = (i2_mvy_round>>1)*ps_mc_params->mvy_cf; + + u4_frm_chroma_wd = (u2_frm_wd>>1); + + i2_pix_x = (MB_SIZE/2) * u2_mb_x + i2_mvx_fullp_round; + i2_pix_y = ((MB_SIZE/2) * u2_mb_y + i2_mvy_fullp_round + u2_mv_num * + ps_mc_params->mv_num_cf)*ps_mc_params->frm_wd_cf; + + CLIP(i2_pix_x, ((u2_frm_wd / 2)-8), 0); + CLIP(i2_pix_y, ((u2_frm_ht / 2)-8), 0); + ps_chroma->u4_src_offset = i2_pix_x + i2_pix_y * u4_frm_chroma_wd; + + + /* keep offset in full pel */ + ps_chroma->u4_rows = ps_mc_params->rows; + ps_chroma->u4_cols = (MB_SIZE >> 1); + ps_chroma->u4_dst_wd_res_buf = ps_mc_params->dst_wd; + ps_chroma->u4_src_wd = (u2_frm_wd>>1) * ps_mc_params->src_wd_cf; + ps_chroma->u4_dst_offset_res_buf = ps_mc_params->dst_offset_scale * MB_CHROMA_SIZE; + ps_chroma->u4_dst_offset_cur_frm = ps_mc_params->dst_offset_scale * (u2_picture_width >> 1); + ps_chroma->u4_mode = ((i2_mvx_round & 1) << 1) | (i2_mvy_round & 1); + + + + ps_luma->u4_dst_wd_cur_frm = u2_picture_width; + ps_chroma->u4_dst_wd_cur_frm = u2_picture_width >> 1; + + if(ps_luma->u4_dst_wd_res_buf == MB_SIZE * 2) + { + ps_luma->u4_dst_wd_cur_frm = u2_frm_wd << 1; + ps_chroma->u4_dst_wd_cur_frm = u2_frm_wd; + } +} + + diff --git a/decoder/impeg2d_mc.h b/decoder/impeg2d_mc.h new file mode 100644 index 0000000..14f1ef9 --- /dev/null +++ b/decoder/impeg2d_mc.h @@ -0,0 +1,78 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_mc.h +* +* @brief +* Contains MC function declarations for MPEG2 codec +* +* @author +* Harish +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __IMPEG2D_MC_H__ +#define __IMPEG2D_MC_H__ + +void impeg2d_dec_2mv_interp_mb(dec_state_t *dec); +void impeg2d_dec_4mv_mb(dec_state_t *dec); + + +void impeg2d_dec_1mv_mb(dec_state_t *dec); +void impeg2d_dec_2mv_fw_or_bk_mb(dec_state_t *dec); +void impeg2d_dec_fld_dual_prime(dec_state_t *dec); +void impeg2d_dec_frm_dual_prime(dec_state_t *dec); + +void impeg2d_mc_1mv(dec_state_t *dec); +void impeg2d_mc_fw_or_bk_mb(dec_state_t *dec); +void impeg2d_mc_fld_dual_prime(dec_state_t *dec); +void impeg2d_mc_frm_dual_prime(dec_state_t *dec); +void impeg2d_mc_4mv(dec_state_t *dec); +void impeg2d_mc_2mv(dec_state_t *dec); + +void impeg2d_dec_skip_mbs(dec_state_t *dec, UWORD16 num_skip_mbs); +void impeg2d_dec_0mv_coded_mb(dec_state_t *dec); +void impeg2d_dec_intra_mb(dec_state_t *dec); + +void impeg2d_set_mc_params(comp_mc_params_t *luma, + comp_mc_params_t *chroma, + e_mb_type_t type, + UWORD16 mv_num, + WORD16 mv[], + UWORD16 mb_x, + UWORD16 mb_y, + UWORD16 frm_wd, + UWORD16 frm_ht, + UWORD16 picture_width); + +void impeg2d_motion_comp(dec_state_t *dec, mb_mc_params_t *params,yuv_buf_t *buf); + +pf_mc_t impeg2d_mc_halfx_halfy; +pf_mc_t impeg2d_mc_halfx_fully; +pf_mc_t impeg2d_mc_fullx_halfy; +pf_mc_t impeg2d_mc_fullx_fully; + + +#endif /* __IMPEG2D_MC_H__*/ diff --git a/decoder/impeg2d_mv_dec.c b/decoder/impeg2d_mv_dec.c new file mode 100644 index 0000000..1a30146 --- /dev/null +++ b/decoder/impeg2d_mv_dec.c @@ -0,0 +1,499 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <stdio.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_globals.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" +#include "impeg2d_mv_dec.h" +#include "impeg2d_mc.h" + +/******************************************************************************* +* Function name : impeg2d_dec_1mv +* +* Description : Decodes a motion vector and updates the predictors +* +* Arguments : +* stream : Bitstream +* predMv : Prediction for the motion vectors +* mv : Motion vectors +* fCode : fcode to the used for the decoding +* shift : Shift value to be used. This will be equal to +* (mv_format == "field") && (picture_structure == "Frame picture") +* i : 0 - MV_X and 1 - MV_Y +* +* Value Returned: None +*******************************************************************************/ +INLINE void impeg2d_dec_1mv(stream_t *ps_stream, WORD16 ai2_pred_mv[], WORD16 ai2_mv[],UWORD16 au2_fCode[], + UWORD16 u2_mv_y_shift, WORD16 ai2_dmv[]) +{ + WORD16 i2_f; + WORD16 i2_r_size; + WORD16 i2_high,i2_low,i2_range; + UWORD32 u4_mv_code; + WORD16 i2_delta; + UWORD16 u2_first_bit; + WORD32 i; + WORD32 ai2_shifts[2]; + UWORD32 u4_buf; + UWORD32 u4_buf_nxt; + UWORD32 u4_offset; + UWORD32 *pu4_buf_aligned; + + ai2_shifts[0] = 0; + ai2_shifts[1] = u2_mv_y_shift; + + + GET_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,ps_stream) + for(i = 0; i < 2; i++) + { + WORD32 i4_shift = ai2_shifts[i]; + /* Decode the motion_code */ + IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_mv_code, MV_CODE_LEN) + u2_first_bit = (u4_mv_code >> (MV_CODE_LEN - 1)) & 0x01; + if(u2_first_bit == 1) /* mvCode == 0 */ + { + i2_delta = 0; + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,1,pu4_buf_aligned) + + ai2_mv[i] = (ai2_pred_mv[i] >> i4_shift); + + ai2_pred_mv[i] = (ai2_mv[i] << i4_shift); + + } + else + { + UWORD16 u2_index; + UWORD16 u2_value; + UWORD16 u2_mv_len; + UWORD16 u2_abs_mvcode_minus1; + UWORD16 u2_sign_bit; + + i2_r_size = au2_fCode[i] - 1; + i2_f = 1 << i2_r_size; + i2_high = (16 * i2_f) - 1; + i2_low = ((-16) * i2_f); + i2_range = (32 * i2_f); + + u2_index = (u4_mv_code >> 1) & 0x1FF; + u2_value = gau2_impeg2d_mv_code[u2_index]; + u2_mv_len = (u2_value & 0x0F); + u2_abs_mvcode_minus1 = (u2_value >> 8) & 0x0FF; + u4_mv_code >>= (MV_CODE_LEN - u2_mv_len - 1); + u2_sign_bit = u4_mv_code & 0x1; + + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,(u2_mv_len + 1),pu4_buf_aligned) + i2_delta = u2_abs_mvcode_minus1 * i2_f + 1; + if(i2_r_size) + { + UWORD32 val; + IBITS_GET(u4_buf, u4_buf_nxt, u4_offset, val, pu4_buf_aligned, i2_r_size) + i2_delta += val; + } + + if(u2_sign_bit) + i2_delta = -i2_delta; + + ai2_mv[i] = (ai2_pred_mv[i] >> i4_shift) + i2_delta; + + if(ai2_mv[i] < i2_low) + { + ai2_mv[i] += i2_range; + } + + if(ai2_mv[i] > i2_high) + { + ai2_mv[i] -= i2_range; + } + ai2_pred_mv[i] = (ai2_mv[i] << i4_shift); + + } + if(ai2_dmv) + { + UWORD32 u4_val; + ai2_dmv[i] = 0; + IBITS_GET(u4_buf, u4_buf_nxt, u4_offset, u4_val, pu4_buf_aligned, 1) + if(u4_val) + { + IBITS_GET(u4_buf, u4_buf_nxt, u4_offset, u4_val, pu4_buf_aligned, 1) + ai2_dmv[i] = gai2_impeg2d_dec_mv[u4_val]; + } + } + } + PUT_TEMP_STREAM_DATA(u4_buf, u4_buf_nxt, u4_offset, pu4_buf_aligned, ps_stream) + +} +/******************************************************************************* +* Function name : impeg2d_dec_mv +* +* Description : Decodes a motion vector and updates the predictors +* +* Arguments : +* stream : Bitstream +* predMv : Prediction for the motion vectors +* mv : Motion vectors +* fCode : fcode to the used for the decoding +* shift : Shift value to be used. This will be equal to +* (mv_format == "field") && (picture_structure == "Frame picture") +* +* Value Returned: None +*******************************************************************************/ +e_field_t impeg2d_dec_mv(stream_t *ps_stream, WORD16 ai2_pred_mv[], WORD16 ai2_mv[],UWORD16 au2_f_code[], + UWORD16 u2_shift, UWORD16 u2_fld_sel) +{ + e_field_t e_fld; + if(u2_fld_sel) + { + e_fld = (e_field_t)impeg2d_bit_stream_get_bit(ps_stream); + } + else + { + e_fld = TOP; + } + + impeg2d_dec_1mv(ps_stream,ai2_pred_mv,ai2_mv,au2_f_code,u2_shift,NULL); + + return(e_fld); +} + +/***************************************************************************** +* Function Name : impeg2d_dec_1mv_mb +* +* Description : Decodes mc params for 1 MV MB +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_1mv_mb(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + WORD16 *pi2_mv; + e_field_t e_fld; + mb_mc_params_t *ps_mc; + e_pred_direction_t e_ref_pic; + + + ps_stream = &ps_dec->s_bit_stream; + e_ref_pic = ps_dec->e_mb_pred; + /************************************************************************/ + /* Decode the motion vector */ + /************************************************************************/ + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST]; + e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[e_ref_pic][FIRST],pi2_mv, + ps_dec->au2_f_code[e_ref_pic],0, ps_dec->u2_fld_pic); + + ps_dec->ai2_pred_mv[e_ref_pic][SECOND][MV_X] = ps_dec->ai2_pred_mv[e_ref_pic][FIRST][MV_X]; + ps_dec->ai2_pred_mv[e_ref_pic][SECOND][MV_Y] = ps_dec->ai2_pred_mv[e_ref_pic][FIRST][MV_Y]; + /************************************************************************/ + /* Set the motion vector params */ + /************************************************************************/ + ps_mc = &ps_dec->as_mb_mc_params[e_ref_pic][FIRST]; + ps_mc->s_ref = ps_dec->as_ref_buf[e_ref_pic][e_fld]; + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, 0, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + +} + +/***************************************************************************** +* Function Name : impeg2d_dec_2mv_fw_or_bk_mb +* +* Description : Decodes first part of params for 2 MV Interpolated MB +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_2mv_fw_or_bk_mb(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + WORD16 *pi2_mv; + e_field_t e_fld; + mb_mc_params_t *ps_mc; + e_pred_direction_t e_ref_pic; + UWORD16 i; + + ps_stream = &ps_dec->s_bit_stream; + e_ref_pic = ps_dec->e_mb_pred; + for(i = 0; i < 2; i++) + { + /********************************************************************/ + /* Decode the first motion vector */ + /********************************************************************/ + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[FORW][i]; + e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[e_ref_pic][i],pi2_mv, + ps_dec->au2_f_code[e_ref_pic],ps_dec->u2_frm_pic, 1); + + /********************************************************************/ + /* Set the motion vector params */ + /********************************************************************/ + ps_mc = &ps_dec->as_mb_mc_params[FORW][i]; + ps_mc->s_ref = ps_dec->as_ref_buf[e_ref_pic][e_fld]; + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, i, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + } +} + +/***************************************************************************** +* Function Name : impeg2d_dec_frm_dual_prime +* +* Description : Decodes first part of params for 2 MV Interpolated MB +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_frm_dual_prime(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + WORD16 *pi2_mv; + mb_mc_params_t *ps_mc; + + WORD16 ai2_dmv[2]; + WORD16 *pi2_mv1, *pi2_mv2, *pi2_mv3, *pi2_mv4; + UWORD16 i,j; + + pi2_mv1 = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]); + pi2_mv2 = (WORD16 *)&(ps_dec->ai2_mv[FORW][SECOND]); + pi2_mv3 = (WORD16 *)&(ps_dec->ai2_mv[BACK][FIRST]); + pi2_mv4 = (WORD16 *)&(ps_dec->ai2_mv[BACK][SECOND]); + + + + ps_stream = &ps_dec->s_bit_stream; + + /************************************************************************/ + /* Decode the motion vector MV_X, MV_Y and dmv[0], dmv[1] */ + /************************************************************************/ + impeg2d_dec_1mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],pi2_mv1,ps_dec->au2_f_code[FORW],ps_dec->u2_frm_pic,ai2_dmv); + + { + WORD16 ai2_m[2][2]; + + if(ps_dec->u2_top_field_first) + { + ai2_m[1][0] = 1; + ai2_m[0][1] = 3; + } + else + { + ai2_m[1][0] = 3; + ai2_m[0][1] = 1; + } + + pi2_mv2[MV_X] = pi2_mv1[MV_X]; + pi2_mv2[MV_Y] = pi2_mv1[MV_Y]; + + pi2_mv3[MV_X] = ai2_dmv[0] + DIV_2_RND(pi2_mv1[MV_X] * ai2_m[1][0]); + pi2_mv4[MV_X] = ai2_dmv[0] + DIV_2_RND(pi2_mv1[MV_X] * ai2_m[0][1]); + + pi2_mv3[MV_Y] = ai2_dmv[1] + DIV_2_RND(pi2_mv1[MV_Y] * ai2_m[1][0]) - 1; + pi2_mv4[MV_Y] = ai2_dmv[1] + DIV_2_RND(pi2_mv1[MV_Y] * ai2_m[0][1]) + 1; + } + + ps_dec->ai2_pred_mv[FORW][SECOND][MV_X] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X]; + ps_dec->ai2_pred_mv[FORW][SECOND][MV_Y] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y]; + + /************************************************************************/ + /* Set the motion vector params */ + /************************************************************************/ + for(j = 0; j < 2; j++) + { + for(i = 0; i < 2; i++) + { + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[j][i]; + ps_mc = &ps_dec->as_mb_mc_params[j][i]; + ps_mc->s_ref = ps_dec->as_ref_buf[FORW][(i ^ j) & 1]; + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, i, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + } + } + +} +/***************************************************************************** +* Function Name : impeg2d_dec_fld_dual_prime +* +* Description : Decodes first part of params for 2 MV Interpolated MB +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_fld_dual_prime(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + WORD16 *pi2_mv; + mb_mc_params_t *ps_mc; + + WORD16 *pi2_mv1, *pi2_mv2; + WORD16 ai2_dmv[2]; + + + pi2_mv1 = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]); + pi2_mv2 = (WORD16 *)&(ps_dec->ai2_mv[FORW][SECOND]); + ps_stream = &ps_dec->s_bit_stream; + + /************************************************************************/ + /* Decode the motion vector MV_X, MV_Y and dmv[0], dmv[1] */ + /************************************************************************/ + impeg2d_dec_1mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],pi2_mv1,ps_dec->au2_f_code[FORW],0,ai2_dmv); + + + pi2_mv2[MV_X] = ai2_dmv[0] + DIV_2_RND(pi2_mv1[MV_X]); + pi2_mv2[MV_Y] = ai2_dmv[1] + DIV_2_RND(pi2_mv1[MV_Y]); + + if(ps_dec->u2_picture_structure == TOP_FIELD) + pi2_mv2[MV_Y] -= 1; + else + pi2_mv2[MV_Y] += 1; + + ps_dec->ai2_pred_mv[FORW][SECOND][MV_X] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X]; + ps_dec->ai2_pred_mv[FORW][SECOND][MV_Y] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y]; + + /************************************************************************/ + /* Set the motion vector params */ + /************************************************************************/ + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[FORW][0]; + ps_mc = &ps_dec->as_mb_mc_params[FORW][0]; + ps_mc->s_ref = ps_dec->as_ref_buf[FORW][ps_dec->u2_fld_parity]; + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, 0, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[FORW][1]; + ps_mc = &ps_dec->as_mb_mc_params[FORW][1]; + ps_mc->s_ref = ps_dec->as_ref_buf[FORW][!ps_dec->u2_fld_parity]; + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, 0, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + + +} +/***************************************************************************** +* Function Name : impeg2d_dec_4mv_mb +* +* Description : Decodes first part of params for 2 MV Interpolated MB +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*****************************************************************************/ +void impeg2d_dec_4mv_mb(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + WORD16 *pi2_mv; + e_field_t e_fld; + mb_mc_params_t *ps_mc; + + UWORD16 i,j; + + ps_stream = &ps_dec->s_bit_stream; + + /***********************************************/ + /* loop for FW & BK */ + /***********************************************/ + for(j = 0; j < 2; j++) + { + /***********************************************/ + /* loop for decoding 2 mvs of same reference frame*/ + /***********************************************/ + for(i = 0; i < 2; i++) + { + /****************************************************************/ + /* Decode the first motion vector */ + /****************************************************************/ + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[j][i]; + e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[j][i],pi2_mv, + ps_dec->au2_f_code[j],ps_dec->u2_frm_pic, 1); + + /****************************************************************/ + /* Set the motion vector params */ + /****************************************************************/ + ps_mc = &ps_dec->as_mb_mc_params[j][i]; + ps_mc->s_ref = ps_dec->as_ref_buf[j][e_fld]; + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, i, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + } + } + +} +/******************************************************************************* +* Function Name : impeg2d_dec_2mv_interp_mb +* +* Description : Decodes first part of params for 2 MV Interpolated MB +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_2mv_interp_mb(dec_state_t *ps_dec) +{ + stream_t *ps_stream; + WORD16 *pi2_mv; + e_field_t e_fld; + mb_mc_params_t *ps_mc; + UWORD16 i; + + ps_stream = &ps_dec->s_bit_stream; + + for(i = 0; i < 2; i++) + { + /********************************************************************/ + /* Decode the first motion vector */ + /********************************************************************/ + pi2_mv = (WORD16 *)&ps_dec->ai2_mv[i][FIRST]; + e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[i][FIRST],pi2_mv, + ps_dec->au2_f_code[i],0, ps_dec->u2_fld_pic); + + ps_dec->ai2_pred_mv[i][SECOND][MV_X] = ps_dec->ai2_pred_mv[i][FIRST][MV_X]; + ps_dec->ai2_pred_mv[i][SECOND][MV_Y] = ps_dec->ai2_pred_mv[i][FIRST][MV_Y]; + /********************************************************************/ + /* Set the motion vector params */ + /********************************************************************/ + ps_mc = &ps_dec->as_mb_mc_params[i][FIRST]; + ps_mc->s_ref = ps_dec->as_ref_buf[i][e_fld]; + impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type,i, + pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width); + } + +} diff --git a/decoder/impeg2d_mv_dec.h b/decoder/impeg2d_mv_dec.h new file mode 100644 index 0000000..f6c691e --- /dev/null +++ b/decoder/impeg2d_mv_dec.h @@ -0,0 +1,28 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2D_MV_DEC_H__ +#define __IMPEG2D_MV_DEC_H__ + +e_field_t impeg2d_dec_mv(stream_t *stream, WORD16 predMv[], WORD16 mv[],UWORD16 fCode[], + UWORD16 shift,UWORD16 fld_sel); +INLINE void impeg2d_dec_1mv(stream_t *stream, WORD16 predMv[], WORD16 mv[],UWORD16 fCode[], + UWORD16 shift,WORD16 dmv[]); + +#endif /* #ifndef __IMPEG2D_MV_DEC_H__ */ diff --git a/decoder/impeg2d_pic_proc.c b/decoder/impeg2d_pic_proc.c new file mode 100755 index 0000000..3dececb --- /dev/null +++ b/decoder/impeg2d_pic_proc.c @@ -0,0 +1,664 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <stdio.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_globals.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" + +void impeg2d_init_function_ptr(void *pv_codec); +void impeg2d_format_convert(dec_state_t *ps_dec, + pic_buf_t *ps_src_pic, + iv_yuv_buf_t *ps_disp_frm_buf, + UWORD32 u4_start_row, UWORD32 u4_num_rows) +{ + UWORD8 *pu1_src_y,*pu1_src_u,*pu1_src_v; + UWORD8 *pu1_dst_y,*pu1_dst_u,*pu1_dst_v; + + + + if((NULL == ps_src_pic) || (NULL == ps_src_pic->pu1_y) || (0 == u4_num_rows)) + return; + + pu1_src_y = ps_src_pic->pu1_y + (u4_start_row * ps_dec->u2_frame_width); + pu1_src_u = ps_src_pic->pu1_u + ((u4_start_row >> 1) * (ps_dec->u2_frame_width >> 1)); + pu1_src_v = ps_src_pic->pu1_v + ((u4_start_row >> 1) *(ps_dec->u2_frame_width >> 1)); + + pu1_dst_y = (UWORD8 *)ps_disp_frm_buf->pv_y_buf + (u4_start_row * ps_dec->u4_frm_buf_stride); + pu1_dst_u = (UWORD8 *)ps_disp_frm_buf->pv_u_buf +((u4_start_row >> 1)*(ps_dec->u4_frm_buf_stride >> 1)); + pu1_dst_v = (UWORD8 *)ps_disp_frm_buf->pv_v_buf +((u4_start_row >> 1)*(ps_dec->u4_frm_buf_stride >> 1)); + + if (IV_YUV_420P == ps_dec->i4_chromaFormat) + { + ps_dec->pf_copy_yuv420p_buf(pu1_src_y, pu1_src_u, pu1_src_v, pu1_dst_y, + pu1_dst_u, pu1_dst_v, + ps_dec->u2_frame_width, + u4_num_rows, + ps_dec->u4_frm_buf_stride, + (ps_dec->u4_frm_buf_stride >> 1), + (ps_dec->u4_frm_buf_stride >> 1), + ps_dec->u2_frame_width, + (ps_dec->u2_frame_width >> 1), + (ps_dec->u2_frame_width >> 1)); + } + else if (IV_YUV_422ILE == ps_dec->i4_chromaFormat) + { + void *pv_yuv422i; + UWORD32 u2_height,u2_width,u2_stride_y,u2_stride_u,u2_stride_v; + UWORD32 u2_stride_yuv422i; + + + pv_yuv422i = (UWORD8 *)ps_disp_frm_buf->pv_y_buf + ((ps_dec->u2_vertical_size)*(ps_dec->u4_frm_buf_stride)); + u2_height = u4_num_rows; + u2_width = ps_dec->u2_horizontal_size; + u2_stride_y = ps_dec->u2_frame_width; + u2_stride_u = u2_stride_y >> 1; + u2_stride_v = u2_stride_u; + u2_stride_yuv422i = (0 == ps_dec->u4_frm_buf_stride) ? ps_dec->u2_horizontal_size : ps_dec->u4_frm_buf_stride; + + ps_dec->pf_fmt_conv_yuv420p_to_yuv422ile(pu1_src_y, + pu1_src_u, + pu1_src_v, + pv_yuv422i, + u2_width, + u2_height, + u2_stride_y, + u2_stride_u, + u2_stride_v, + u2_stride_yuv422i); + + } + else if((ps_dec->i4_chromaFormat == IV_YUV_420SP_UV) || + (ps_dec->i4_chromaFormat == IV_YUV_420SP_VU)) + { + + UWORD32 dest_inc_Y=0,dest_inc_UV=0; + WORD32 convert_uv_only; + + pu1_dst_u = (UWORD8 *)ps_disp_frm_buf->pv_u_buf +((u4_start_row >> 1)*(ps_dec->u4_frm_buf_stride)); + dest_inc_Y = ps_dec->u4_frm_buf_stride; + dest_inc_UV = ((ps_dec->u4_frm_buf_stride + 1) >> 1) << 1; + convert_uv_only = 0; + if(1 == ps_dec->u4_share_disp_buf) + convert_uv_only = 1; + + if(ps_dec->i4_chromaFormat == IV_YUV_420SP_UV) + { + ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv(pu1_src_y, + pu1_src_u, + pu1_src_v, + pu1_dst_y, + pu1_dst_u, + u4_num_rows, + ps_dec->u2_horizontal_size, + ps_dec->u2_frame_width, + ps_dec->u2_frame_width >> 1, + ps_dec->u2_frame_width >> 1, + dest_inc_Y, + dest_inc_UV, + convert_uv_only); + } + else + { + ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu(pu1_src_y, + pu1_src_u, + pu1_src_v, + pu1_dst_y, + pu1_dst_u, + u4_num_rows, + ps_dec->u2_horizontal_size, + ps_dec->u2_frame_width, + ps_dec->u2_frame_width >> 1, + ps_dec->u2_frame_width >> 1, + dest_inc_Y, + dest_inc_UV, + convert_uv_only); + } + + + + } + +} + + +/******************************************************************************* +* +* Function Name : impeg2d_get_frm_buf +* +* Description : Gets YUV component buffers for the frame +* +* Arguments : +* frm_buf : YUV buffer +* frm : Reference frame +* width : Width of the frame +* Height : Height of the frame +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_get_frm_buf(yuv_buf_t *ps_frm_buf,UWORD8 *pu1_frm,UWORD32 u4_width,UWORD32 u4_height) +{ + UWORD32 u4_luma_size = u4_width * u4_height; + UWORD32 u4_chroma_size = (u4_width * u4_height)>>2; + + ps_frm_buf->pu1_y = pu1_frm; + ps_frm_buf->pu1_u = pu1_frm + u4_luma_size; + ps_frm_buf->pu1_v = pu1_frm + u4_luma_size + u4_chroma_size; + +} +/******************************************************************************* +* +* Function Name : impeg2d_get_bottom_field_buf +* +* Description : Gets YUV component buffers for bottom field of the frame +* +* Arguments : +* frm_buf : YUV buffer +* frm : Reference frame +* width : Width of the frame +* Height : Height of the frame +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_get_bottom_field_buf(yuv_buf_t *ps_src_buf,yuv_buf_t *ps_dst_buf, + UWORD32 u4_width) +{ + ps_dst_buf->pu1_y = ps_src_buf->pu1_y + u4_width; + ps_dst_buf->pu1_u = ps_src_buf->pu1_u + (u4_width>>1); + ps_dst_buf->pu1_v = ps_src_buf->pu1_v + (u4_width>>1); + +} +/******************************************************************************* +* Function Name : impeg2d_get_mb_addr_incr +* +* Description : Decodes the Macroblock address increment +* +* Arguments : +* stream : Bitstream +* +* Values Returned : Macroblock address increment +*******************************************************************************/ +UWORD16 impeg2d_get_mb_addr_incr(stream_t *ps_stream) +{ + UWORD16 u2_mb_addr_incr = 0; + while (impeg2d_bit_stream_nxt(ps_stream,MB_ESCAPE_CODE_LEN) == MB_ESCAPE_CODE) + { + impeg2d_bit_stream_flush(ps_stream,MB_ESCAPE_CODE_LEN); + u2_mb_addr_incr += 33; + } + u2_mb_addr_incr += impeg2d_dec_vld_symbol(ps_stream,gai2_impeg2d_mb_addr_incr,MB_ADDR_INCR_LEN) + + MB_ADDR_INCR_OFFSET; + return(u2_mb_addr_incr); +} + +/******************************************************************************* +* +* Function Name : impeg2d_init_video_state +* +* Description : Initializes the Video decoder state +* +* Arguments : +* dec : Decoder context +* videoType : MPEG_2_Video / MPEG_1_Video +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_init_video_state(dec_state_t *ps_dec, e_video_type_t e_video_type) +{ + /*-----------------------------------------------------------------------*/ + /* Bit Stream that conforms to MPEG-1 <ISO/IEC 11172-2> standard */ + /*-----------------------------------------------------------------------*/ + if(e_video_type == MPEG_1_VIDEO) + { + ps_dec->u2_is_mpeg2 = 0; + + /*-------------------------------------------------------------------*/ + /* force MPEG-1 parameters for proper decoder behavior */ + /* see ISO/IEC 13818-2 section D.9.14 */ + /*-------------------------------------------------------------------*/ + ps_dec->u2_progressive_sequence = 1; + ps_dec->u2_intra_dc_precision = 0; + ps_dec->u2_picture_structure = FRAME_PICTURE; + ps_dec->u2_frame_pred_frame_dct = 1; + ps_dec->u2_concealment_motion_vectors = 0; + ps_dec->u2_q_scale_type = 0; + ps_dec->u2_intra_vlc_format = 0; + ps_dec->u2_alternate_scan = 0; + ps_dec->u2_repeat_first_field = 0; + ps_dec->u2_progressive_frame = 1; + ps_dec->u2_frame_rate_extension_n = 0; + ps_dec->u2_frame_rate_extension_d = 0; + + ps_dec->pf_vld_inv_quant = impeg2d_vld_inv_quant_mpeg1; + /*-------------------------------------------------------------------*/ + /* Setting of parameters other than those mentioned in MPEG2 standard*/ + /* but used in decoding process. */ + /*-------------------------------------------------------------------*/ + } + /*-----------------------------------------------------------------------*/ + /* Bit Stream that conforms to MPEG-2 */ + /*-----------------------------------------------------------------------*/ + else + { + ps_dec->u2_is_mpeg2 = 1; + ps_dec->u2_full_pel_forw_vector = 0; + ps_dec->u2_forw_f_code = 7; + ps_dec->u2_full_pel_back_vector = 0; + ps_dec->u2_back_f_code = 7; + ps_dec->pf_vld_inv_quant = impeg2d_vld_inv_quant_mpeg2; + + + } + + + impeg2d_init_function_ptr(ps_dec); + + /* Set the frame Width and frame Height */ + ps_dec->u2_frame_height = ALIGN16(ps_dec->u2_vertical_size); + ps_dec->u2_frame_width = ALIGN16(ps_dec->u2_horizontal_size); + ps_dec->u2_num_horiz_mb = (ps_dec->u2_horizontal_size + 15) >> 4; + // dec->u4_frm_buf_stride = dec->frameWidth; + if (ps_dec->u2_frame_height > ps_dec->u2_create_max_height || ps_dec->u2_frame_width > ps_dec->u2_create_max_width) + { + return IMPEG2D_PIC_SIZE_NOT_SUPPORTED; + } + + ps_dec->u2_num_flds_decoded = 0; + + /* Calculate the frame period */ + { + UWORD32 numer; + UWORD32 denom; + numer = (UWORD32)gau2_impeg2_frm_rate_code[ps_dec->u2_frame_rate_code][1] * + (UWORD32)(ps_dec->u2_frame_rate_extension_d + 1); + + denom = (UWORD32)gau2_impeg2_frm_rate_code[ps_dec->u2_frame_rate_code][0] * + (UWORD32)(ps_dec->u2_frame_rate_extension_n + 1); + ps_dec->u2_framePeriod = (numer * 1000 * 100) / denom; + } + + + if(VERTICAL_SCAN == ps_dec->u2_alternate_scan) + { + ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_vertical; + } + else + { + ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_zig_zag; + } + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +} +/******************************************************************************* +* +* Function Name : impeg2d_pre_pic_dec_proc +* +* Description : Does the processing neccessary before picture decoding +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_pre_pic_dec_proc(dec_state_t *ps_dec) +{ + WORD32 u4_get_disp; + pic_buf_t *ps_disp_pic; + IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; + + u4_get_disp = 0; + ps_disp_pic = NULL; + + /* Field Picture */ + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + { + ps_dec->u2_num_vert_mb = (ps_dec->u2_vertical_size + 31) >> 5; + + if(ps_dec->u2_num_flds_decoded == 0) + { + pic_buf_t *ps_pic_buf; + u4_get_disp = 1; + + ps_pic_buf = impeg2_buf_mgr_get_next_free(ps_dec->pv_pic_buf_mg, &ps_dec->i4_cur_buf_id); + + if (NULL == ps_pic_buf) + { + return IMPEG2D_NO_FREE_BUF_ERR; + } + + impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_DISP); + impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_REF); + + ps_pic_buf->u4_ts = ps_dec->u4_inp_ts; + ps_dec->ps_cur_pic = ps_pic_buf; + ps_dec->s_cur_frm_buf.pu1_y = ps_pic_buf->pu1_y; + ps_dec->s_cur_frm_buf.pu1_u = ps_pic_buf->pu1_u; + ps_dec->s_cur_frm_buf.pu1_v = ps_pic_buf->pu1_v; + } + + if(ps_dec->u2_picture_structure == TOP_FIELD) + { + ps_dec->u2_fld_parity = TOP; + } + else + { + ps_dec->u2_fld_parity = BOTTOM; + } + ps_dec->u2_field_dct = 0; + ps_dec->u2_read_dct_type = 0; + ps_dec->u2_read_motion_type = 1; + ps_dec->u2_fld_pic = 1; + ps_dec->u2_frm_pic = 0; + ps_dec->ps_func_forw_or_back = gas_impeg2d_func_fld_fw_or_bk; + ps_dec->ps_func_bi_direct = gas_impeg2d_func_fld_bi_direct; + } + /* Frame Picture */ + else + { + pic_buf_t *ps_pic_buf; + + + ps_dec->u2_num_vert_mb = (ps_dec->u2_vertical_size + 15) >> 4; + u4_get_disp = 1; + ps_pic_buf = impeg2_buf_mgr_get_next_free(ps_dec->pv_pic_buf_mg, &ps_dec->i4_cur_buf_id); + + if (NULL == ps_pic_buf) + { + return IMPEG2D_NO_FREE_BUF_ERR; + } + impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_DISP); + impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_REF); + + ps_pic_buf->u4_ts = ps_dec->u4_inp_ts; + ps_dec->ps_cur_pic = ps_pic_buf; + ps_dec->s_cur_frm_buf.pu1_y = ps_pic_buf->pu1_y; + ps_dec->s_cur_frm_buf.pu1_u = ps_pic_buf->pu1_u; + ps_dec->s_cur_frm_buf.pu1_v = ps_pic_buf->pu1_v; + + + if(ps_dec->u2_frame_pred_frame_dct == 0) + { + ps_dec->u2_read_dct_type = 1; + ps_dec->u2_read_motion_type = 1; + } + else + { + ps_dec->u2_read_dct_type = 0; + ps_dec->u2_read_motion_type = 0; + ps_dec->u2_motion_type = 2; + ps_dec->u2_field_dct = 0; + } + + ps_dec->u2_fld_parity = TOP; + ps_dec->u2_fld_pic = 0; + ps_dec->u2_frm_pic = 1; + ps_dec->ps_func_forw_or_back = gas_impeg2d_func_frm_fw_or_bk; + ps_dec->ps_func_bi_direct = gas_impeg2d_func_frm_bi_direct; + } + ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_num_mbs_left = ps_dec->u2_num_horiz_mb * ps_dec->u2_num_vert_mb; + if(u4_get_disp) + { + if(ps_dec->u4_num_frames_decoded > 1) + { + ps_disp_pic = impeg2_disp_mgr_get(&ps_dec->s_disp_mgr, &ps_dec->i4_disp_buf_id); + } + ps_dec->ps_disp_pic = ps_disp_pic; + if(ps_disp_pic) + { + if(1 == ps_dec->u4_share_disp_buf) + { + ps_dec->ps_disp_frm_buf->pv_y_buf = ps_disp_pic->pu1_y; + if(IV_YUV_420P == ps_dec->i4_chromaFormat) + { + ps_dec->ps_disp_frm_buf->pv_u_buf = ps_disp_pic->pu1_u; + ps_dec->ps_disp_frm_buf->pv_v_buf = ps_disp_pic->pu1_v; + } + else + { + UWORD8 *pu1_buf; + + pu1_buf = ps_dec->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[1]; + ps_dec->ps_disp_frm_buf->pv_u_buf = pu1_buf; + + pu1_buf = ps_dec->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[2]; + ps_dec->ps_disp_frm_buf->pv_v_buf = pu1_buf; + } + } + } + } + + + switch(ps_dec->e_pic_type) + { + case I_PIC: + { + ps_dec->pf_decode_slice = impeg2d_dec_i_slice; + break; + } + case D_PIC: + { + ps_dec->pf_decode_slice = impeg2d_dec_d_slice; + break; + } + case P_PIC: + { + ps_dec->pf_decode_slice = impeg2d_dec_p_b_slice; + ps_dec->pu2_mb_type = gau2_impeg2d_p_mb_type; + break; + } + case B_PIC: + { + ps_dec->pf_decode_slice = impeg2d_dec_p_b_slice; + ps_dec->pu2_mb_type = gau2_impeg2d_b_mb_type; + break; + } + default: + return IMPEG2D_INVALID_PIC_TYPE; + } + + /*************************************************************************/ + /* Set the reference pictures */ + /*************************************************************************/ + + /* Error resilience: If forward and backward pictures are going to be NULL*/ + /* then assign both to the current */ + /* if one of them NULL then we will assign the non null to the NULL one */ + + if(ps_dec->e_pic_type == P_PIC) + { + if (NULL == ps_dec->as_recent_fld[1][0].pu1_y) + { + ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf; + } + if (NULL == ps_dec->as_recent_fld[1][1].pu1_y) + { + impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1], + ps_dec->u2_frame_width); + } + + ps_dec->as_ref_buf[FORW][TOP] = ps_dec->as_recent_fld[1][0]; + ps_dec->as_ref_buf[FORW][BOTTOM] = ps_dec->as_recent_fld[1][1]; + + + } + else if(ps_dec->e_pic_type == B_PIC) + { + if((NULL == ps_dec->as_recent_fld[1][0].pu1_y) && (NULL == ps_dec->as_recent_fld[0][0].pu1_y)) + { + // assign the current picture to both + ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf; + impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1], + ps_dec->u2_frame_width); + ps_dec->as_recent_fld[0][0] = ps_dec->s_cur_frm_buf; + ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1]; + } + //Assign the non-null picture to the null picture + else if ((NULL != ps_dec->as_recent_fld[1][0].pu1_y) && (NULL == ps_dec->as_recent_fld[0][0].pu1_y)) + { + ps_dec->as_recent_fld[0][0] = ps_dec->as_recent_fld[1][0]; + ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1]; + } + else if ((NULL == ps_dec->as_recent_fld[1][0].pu1_y) && (NULL != ps_dec->as_recent_fld[0][0].pu1_y)) + { + ps_dec->as_recent_fld[1][0] = ps_dec->as_recent_fld[0][0]; + ps_dec->as_recent_fld[1][1] = ps_dec->as_recent_fld[0][1]; + } + + ps_dec->as_ref_buf[FORW][TOP] = ps_dec->as_recent_fld[0][0]; + ps_dec->as_ref_buf[FORW][BOTTOM] = ps_dec->as_recent_fld[0][1]; + ps_dec->as_ref_buf[BACK][TOP] = ps_dec->as_recent_fld[1][0]; + ps_dec->as_ref_buf[BACK][BOTTOM] = ps_dec->as_recent_fld[1][1]; + + + } + + return e_error; +} + +/******************************************************************************* +* +* Function Name : impeg2d_post_pic_dec_proc +* +* Description : Performs processing that is needed at the end of picture +* decode +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_post_pic_dec_proc(dec_state_t *ps_dec) +{ + + WORD32 u4_update_pic_buf = 0; + /*************************************************************************/ + /* Processing at the end of picture */ + /*************************************************************************/ + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + { + ps_dec->u2_num_vert_mb = (ps_dec->u2_vertical_size + 31) >> 5; + + if(ps_dec->u2_num_flds_decoded == 1) + { + ps_dec->u2_num_flds_decoded = 0; + u4_update_pic_buf = 1; + } + else + { + ps_dec->u2_num_flds_decoded = 1; + } + } + else + { + u4_update_pic_buf = 1; + } + + if(u4_update_pic_buf) + { + ps_dec->i4_frame_decoded = 1; + if(ps_dec->e_pic_type != B_PIC) + { + /* In any sequence first two pictures have to be reference pictures */ + /* Adding of first picture in the sequence */ + if(ps_dec->aps_ref_pics[0] == NULL) + { + ps_dec->aps_ref_pics[0] = ps_dec->ps_cur_pic; + } + + /* Adding of second picture in the sequence */ + else if(ps_dec->aps_ref_pics[1] == NULL) + { + ps_dec->aps_ref_pics[1] = ps_dec->ps_cur_pic; + impeg2_disp_mgr_add(&ps_dec->s_disp_mgr, ps_dec->aps_ref_pics[0], ps_dec->aps_ref_pics[0]->i4_buf_id); + } + else + { + + impeg2_disp_mgr_add(&ps_dec->s_disp_mgr, ps_dec->aps_ref_pics[1], ps_dec->aps_ref_pics[1]->i4_buf_id); + impeg2_buf_mgr_release(ps_dec->pv_pic_buf_mg, ps_dec->aps_ref_pics[0]->i4_buf_id, BUF_MGR_REF); + ps_dec->aps_ref_pics[0] = ps_dec->aps_ref_pics[1]; + ps_dec->aps_ref_pics[1] = ps_dec->ps_cur_pic; + + } + } + else + { + impeg2_disp_mgr_add(&ps_dec->s_disp_mgr, ps_dec->ps_cur_pic, ps_dec->ps_cur_pic->i4_buf_id); + + impeg2_buf_mgr_release(ps_dec->pv_pic_buf_mg, ps_dec->ps_cur_pic->i4_buf_id, BUF_MGR_REF); + } + + } + /*************************************************************************/ + /* Update the list of recent reference pictures */ + /*************************************************************************/ + if(ps_dec->e_pic_type != B_PIC) + { + switch(ps_dec->u2_picture_structure) + { + case FRAME_PICTURE: + { + ps_dec->as_recent_fld[0][0] = ps_dec->as_recent_fld[1][0]; + ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1]; + + ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf; + impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1], + ps_dec->u2_frame_width); + break; + } + case TOP_FIELD: + { + ps_dec->as_recent_fld[0][0] = ps_dec->as_recent_fld[1][0]; + ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf; + break; + } + case BOTTOM_FIELD: + { + ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1]; + impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1], + ps_dec->u2_frame_width); + break; + } + } + } +} diff --git a/decoder/impeg2d_pic_proc.h b/decoder/impeg2d_pic_proc.h new file mode 100644 index 0000000..e97dd13 --- /dev/null +++ b/decoder/impeg2d_pic_proc.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2D_PIC_PROC_H__ +#define __IMPEG2D_PIC_PROC_H__ + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ +UWORD16 impeg2d_get_mb_addr_incr(stream_t *stream); +IMPEG2D_ERROR_CODES_T impeg2d_init_video_state(dec_state_t *dec, e_video_type_t videoType); +IMPEG2D_ERROR_CODES_T impeg2d_pre_pic_dec_proc(dec_state_t *dec); +void impeg2d_post_pic_dec_proc(dec_state_t *dec); +IMPEG2D_ERROR_CODES_T impeg2d_dec_i_slice(dec_state_t *dec); +IMPEG2D_ERROR_CODES_T impeg2d_dec_d_slice(dec_state_t *dec); +IMPEG2D_ERROR_CODES_T impeg2d_dec_p_b_slice(dec_state_t *dec); + +void impeg2d_format_convert(dec_state_t *ps_dec, + pic_buf_t *ps_src_pic, + iv_yuv_buf_t *ps_disp_frm_buf, + UWORD32 u4_start_row, UWORD32 u4_num_rows); + + +#endif /* __IMPEG2D_PIC_PROC_H__ */ + diff --git a/decoder/impeg2d_pnb_pic.c b/decoder/impeg2d_pnb_pic.c new file mode 100644 index 0000000..036c7d1 --- /dev/null +++ b/decoder/impeg2d_pnb_pic.c @@ -0,0 +1,698 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <stdio.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" +#include "impeg2d_mc.h" + +#define BLK_SIZE 8 +#define LUMA_BLK_SIZE (2 * (BLK_SIZE)) +#define CHROMA_BLK_SIZE (BLK_SIZE) + + +/******************************************************************************* +* +* Function Name : impeg2d_dec_p_mb_params +* +* Description : Decodes the parameters for P +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_p_mb_params(dec_state_t *ps_dec) +{ + stream_t *ps_stream = &ps_dec->s_bit_stream; + UWORD16 u2_mb_addr_incr; + UWORD16 u2_total_len; + UWORD16 u2_len; + UWORD16 u2_mb_type; + UWORD32 u4_next_word; + const dec_mb_params_t *ps_dec_mb_params; + if(impeg2d_bit_stream_nxt(ps_stream,1) == 1) + { + impeg2d_bit_stream_flush(ps_stream,1); + + } + else + { + u2_mb_addr_incr = impeg2d_get_mb_addr_incr(ps_stream); + if(0 == ps_dec->u2_first_mb) + { + /****************************************************************/ + /* If the 2nd member of a field picture pair is a P picture and */ + /* the first one was an I picture, there cannot be any skipped */ + /* MBs in the second field picture */ + /****************************************************************/ + /* + if((dec->picture_structure != FRAME_PICTURE) && + (dec->f->FieldFuncCall != 0) && + (dec->las->u1_last_coded_vop_type == I)) + { + core0_err_handler((void *)(VOLParams), + ITTMPEG2_ERR_INVALID_MB_SKIP); + } + */ + /****************************************************************/ + /* In MPEG-2, the last MB of the row cannot be skipped and the */ + /* MBAddrIncr cannot be such that it will take the current MB */ + /* beyond the current row */ + /* In MPEG-1, the slice could start and end anywhere and is not */ + /* restricted to a row like in MPEG-2. Hence this check should */ + /* not be done for MPEG-1 streams. */ + /****************************************************************/ + if(ps_dec->u2_is_mpeg2 && ((ps_dec->u2_mb_x + u2_mb_addr_incr) > ps_dec->u2_num_horiz_mb) ) + { + u2_mb_addr_incr = ps_dec->u2_num_horiz_mb - ps_dec->u2_mb_x; + } + + impeg2d_dec_skip_mbs(ps_dec, (UWORD16)(u2_mb_addr_incr - 1)); + } + + } + u4_next_word = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,16); + /*-----------------------------------------------------------------------*/ + /* MB type */ + /*-----------------------------------------------------------------------*/ + { + u2_mb_type = ps_dec->pu2_mb_type[BITS((UWORD16)u4_next_word,15,10)]; + u2_len = BITS(u2_mb_type,15,8); + u2_total_len = u2_len; + u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << u2_len); + } + /*-----------------------------------------------------------------------*/ + /* motion type */ + /*-----------------------------------------------------------------------*/ + { + if((u2_mb_type & MB_FORW_OR_BACK) && ps_dec->u2_read_motion_type) + { + WORD32 i4_motion_type; + ps_dec->u2_motion_type = BITS((UWORD16)u4_next_word,15,14); + u2_total_len += MB_MOTION_TYPE_LEN; + u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << MB_MOTION_TYPE_LEN); + i4_motion_type = ps_dec->u2_motion_type; + + if((i4_motion_type == 0) || + (i4_motion_type == 4) || + (i4_motion_type > 7)) + { + //TODO : VANG Check for validity + i4_motion_type = 1; + } + + } + } + /*-----------------------------------------------------------------------*/ + /* dct type */ + /*-----------------------------------------------------------------------*/ + { + if((u2_mb_type & MB_CODED) && ps_dec->u2_read_dct_type) + { + ps_dec->u2_field_dct = BIT((UWORD16)u4_next_word,15); + u2_total_len += MB_DCT_TYPE_LEN; + u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << MB_DCT_TYPE_LEN); + } + } + /*-----------------------------------------------------------------------*/ + /* Quant scale code */ + /*-----------------------------------------------------------------------*/ + if(u2_mb_type & MB_QUANT) + { + UWORD16 u2_quant_scale_code; + u2_quant_scale_code = BITS((UWORD16)u4_next_word,15,11); + + ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ? + gau1_impeg2_non_linear_quant_scale[u2_quant_scale_code] : (u2_quant_scale_code << 1); + u2_total_len += MB_QUANT_SCALE_CODE_LEN; + } + impeg2d_bit_stream_flush(ps_stream,u2_total_len); + /*-----------------------------------------------------------------------*/ + /* Set the function pointers */ + /*-----------------------------------------------------------------------*/ + ps_dec->u2_coded_mb = (UWORD16)(u2_mb_type & MB_CODED); + + if(u2_mb_type & MB_FORW_OR_BACK) + { + + UWORD16 refPic = !(u2_mb_type & MB_MV_FORW); + UWORD16 index = (ps_dec->u2_motion_type); + ps_dec->u2_prev_intra_mb = 0; + ps_dec->e_mb_pred = (e_pred_direction_t)refPic; + ps_dec_mb_params = &ps_dec->ps_func_forw_or_back[index]; + ps_dec->s_mb_type = ps_dec_mb_params->s_mb_type; + ps_dec_mb_params->pf_func_mb_params(ps_dec); + + } + else if(u2_mb_type & MB_TYPE_INTRA) + { + ps_dec->u2_prev_intra_mb = 1; + impeg2d_dec_intra_mb(ps_dec); + + } + else + { + ps_dec->u2_prev_intra_mb = 0; + ps_dec->e_mb_pred = FORW; + ps_dec->u2_motion_type = 0; + impeg2d_dec_0mv_coded_mb(ps_dec); + } + + /*-----------------------------------------------------------------------*/ + /* decode cbp */ + /*-----------------------------------------------------------------------*/ + if((u2_mb_type & MB_TYPE_INTRA)) + { + ps_dec->u2_cbp = 0x3f; + ps_dec->u2_prev_intra_mb = 1; + } + else + { + ps_dec->u2_prev_intra_mb = 0; + ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + if((ps_dec->u2_coded_mb)) + { + UWORD16 cbpValue; + cbpValue = gau2_impeg2d_cbp_code[impeg2d_bit_stream_nxt(ps_stream,MB_CBP_LEN)]; + ps_dec->u2_cbp = cbpValue & 0xFF; + impeg2d_bit_stream_flush(ps_stream,(cbpValue >> 8) & 0x0FF); + } + else + { + ps_dec->u2_cbp = 0; + } + } +} + + +/******************************************************************************* +* +* Function Name : impeg2d_dec_pnb_mb_params +* +* Description : Decodes the parameters for P and B pictures +* +* Arguments : +* dec : Decoder context +* +* Values Returned : None +*******************************************************************************/ +void impeg2d_dec_pnb_mb_params(dec_state_t *ps_dec) +{ + stream_t *ps_stream = &ps_dec->s_bit_stream; + UWORD16 u2_mb_addr_incr; + UWORD16 u2_total_len; + UWORD16 u2_len; + UWORD16 u2_mb_type; + UWORD32 u4_next_word; + const dec_mb_params_t *ps_dec_mb_params; + if(impeg2d_bit_stream_nxt(ps_stream,1) == 1) + { + impeg2d_bit_stream_flush(ps_stream,1); + + } + else + { + u2_mb_addr_incr = impeg2d_get_mb_addr_incr(ps_stream); + + if(ps_dec->u2_first_mb) + { + /****************************************************************/ + /* Section 6.3.17 */ + /* The first MB of a slice cannot be skipped */ + /* But the mb_addr_incr can be > 1, because at the beginning of */ + /* a slice, it indicates the offset from the last MB in the */ + /* previous row. Hence for the first slice in a row, the */ + /* mb_addr_incr needs to be 1. */ + /****************************************************************/ + /* MB_x is set to zero whenever MB_y changes. */ + ps_dec->u2_mb_x = u2_mb_addr_incr - 1; + /* For error resilience */ + ps_dec->u2_mb_x = MIN(ps_dec->u2_mb_x, (ps_dec->u2_num_horiz_mb - 1)); + + /****************************************************************/ + /* mb_addr_incr is forced to 1 because in this decoder it is used */ + /* more as an indicator of the number of MBs skipped than the */ + /* as defined by the standard (Section 6.3.17) */ + /****************************************************************/ + u2_mb_addr_incr = 1; + ps_dec->u2_first_mb = 0; + } + else + { + /****************************************************************/ + /* In MPEG-2, the last MB of the row cannot be skipped and the */ + /* mb_addr_incr cannot be such that it will take the current MB */ + /* beyond the current row */ + /* In MPEG-1, the slice could start and end anywhere and is not */ + /* restricted to a row like in MPEG-2. Hence this check should */ + /* not be done for MPEG-1 streams. */ + /****************************************************************/ + if(ps_dec->u2_is_mpeg2 && + ((ps_dec->u2_mb_x + u2_mb_addr_incr) > ps_dec->u2_num_horiz_mb)) + { + u2_mb_addr_incr = ps_dec->u2_num_horiz_mb - ps_dec->u2_mb_x; + } + + + impeg2d_dec_skip_mbs(ps_dec, (UWORD16)(u2_mb_addr_incr - 1)); + } + + } + u4_next_word = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,16); + /*-----------------------------------------------------------------------*/ + /* MB type */ + /*-----------------------------------------------------------------------*/ + { + u2_mb_type = ps_dec->pu2_mb_type[BITS((UWORD16)u4_next_word,15,10)]; + u2_len = BITS(u2_mb_type,15,8); + u2_total_len = u2_len; + u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << u2_len); + } + /*-----------------------------------------------------------------------*/ + /* motion type */ + /*-----------------------------------------------------------------------*/ + { + WORD32 i4_motion_type = ps_dec->u2_motion_type; + + if((u2_mb_type & MB_FORW_OR_BACK) && ps_dec->u2_read_motion_type) + { + ps_dec->u2_motion_type = BITS((UWORD16)u4_next_word,15,14); + u2_total_len += MB_MOTION_TYPE_LEN; + u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << MB_MOTION_TYPE_LEN); + i4_motion_type = ps_dec->u2_motion_type; + + } + + + if ((u2_mb_type & MB_FORW_OR_BACK) && + ((i4_motion_type == 0) || + (i4_motion_type == 3) || + (i4_motion_type == 4) || + (i4_motion_type >= 7))) + { + //TODO: VANG Check for validity + i4_motion_type = 1; + } + + } + /*-----------------------------------------------------------------------*/ + /* dct type */ + /*-----------------------------------------------------------------------*/ + { + if((u2_mb_type & MB_CODED) && ps_dec->u2_read_dct_type) + { + ps_dec->u2_field_dct = BIT((UWORD16)u4_next_word,15); + u2_total_len += MB_DCT_TYPE_LEN; + u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << MB_DCT_TYPE_LEN); + } + } + /*-----------------------------------------------------------------------*/ + /* Quant scale code */ + /*-----------------------------------------------------------------------*/ + if(u2_mb_type & MB_QUANT) + { + UWORD16 u2_quant_scale_code; + u2_quant_scale_code = BITS((UWORD16)u4_next_word,15,11); + + ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ? + gau1_impeg2_non_linear_quant_scale[u2_quant_scale_code] : (u2_quant_scale_code << 1); + u2_total_len += MB_QUANT_SCALE_CODE_LEN; + } + impeg2d_bit_stream_flush(ps_stream,u2_total_len); + /*-----------------------------------------------------------------------*/ + /* Set the function pointers */ + /*-----------------------------------------------------------------------*/ + ps_dec->u2_coded_mb = (UWORD16)(u2_mb_type & MB_CODED); + + if(u2_mb_type & MB_BIDRECT) + { + UWORD16 u2_index = (ps_dec->u2_motion_type); + + ps_dec->u2_prev_intra_mb = 0; + ps_dec->e_mb_pred = BIDIRECT; + ps_dec_mb_params = &ps_dec->ps_func_bi_direct[u2_index]; + ps_dec->s_mb_type = ps_dec_mb_params->s_mb_type; + ps_dec_mb_params->pf_func_mb_params(ps_dec); + } + else if(u2_mb_type & MB_FORW_OR_BACK) + { + + UWORD16 u2_refPic = !(u2_mb_type & MB_MV_FORW); + UWORD16 u2_index = (ps_dec->u2_motion_type); + ps_dec->u2_prev_intra_mb = 0; + ps_dec->e_mb_pred = (e_pred_direction_t)u2_refPic; + ps_dec_mb_params = &ps_dec->ps_func_forw_or_back[u2_index]; + ps_dec->s_mb_type = ps_dec_mb_params->s_mb_type; + ps_dec_mb_params->pf_func_mb_params(ps_dec); + + } + else if(u2_mb_type & MB_TYPE_INTRA) + { + ps_dec->u2_prev_intra_mb = 1; + impeg2d_dec_intra_mb(ps_dec); + + } + else + { + ps_dec->u2_prev_intra_mb =0; + ps_dec->e_mb_pred = FORW; + ps_dec->u2_motion_type = 0; + impeg2d_dec_0mv_coded_mb(ps_dec); + } + + /*-----------------------------------------------------------------------*/ + /* decode cbp */ + /*-----------------------------------------------------------------------*/ + if((u2_mb_type & MB_TYPE_INTRA)) + { + ps_dec->u2_cbp = 0x3f; + ps_dec->u2_prev_intra_mb = 1; + } + else + { + ps_dec->u2_prev_intra_mb = 0; + ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision; + if((ps_dec->u2_coded_mb)) + { + UWORD16 cbpValue; + cbpValue = gau2_impeg2d_cbp_code[impeg2d_bit_stream_nxt(ps_stream,MB_CBP_LEN)]; + ps_dec->u2_cbp = cbpValue & 0xFF; + impeg2d_bit_stream_flush(ps_stream,(cbpValue >> 8) & 0x0FF); + } + else + { + ps_dec->u2_cbp = 0; + } + } +} + +/******************************************************************************* +* Function Name : impeg2d_dec_p_b_slice +* +* Description : Decodes P and B slices +* +* Arguments : +* dec : Decoder state +* +* Values Returned : None +*******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_dec_p_b_slice(dec_state_t *ps_dec) +{ + WORD16 *pi2_vld_out; + UWORD32 i; + yuv_buf_t *ps_cur_frm_buf = &ps_dec->s_cur_frm_buf; + + UWORD32 u4_frm_offset = 0; + const dec_mb_params_t *ps_dec_mb_params; + IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; + + pi2_vld_out = ps_dec->ai2_vld_buf; + memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv)); + + ps_dec->u2_prev_intra_mb = 0; + ps_dec->u2_first_mb = 1; + + ps_dec->u2_picture_width = ps_dec->u2_frame_width; + + if(ps_dec->u2_picture_structure != FRAME_PICTURE) + { + ps_dec->u2_picture_width <<= 1; + if(ps_dec->u2_picture_structure == BOTTOM_FIELD) + { + u4_frm_offset = ps_dec->u2_frame_width; + } + } + + do + { + UWORD32 u4_x_offset, u4_y_offset; + + + + UWORD32 u4_x_dst_offset = 0; + UWORD32 u4_y_dst_offset = 0; + UWORD8 *pu1_out_p; + UWORD8 *pu1_pred; + WORD32 u4_pred_strd; + + IMPEG2D_TRACE_MB_START(ps_dec->u2_mb_x, ps_dec->u2_mb_y); + + + if(ps_dec->e_pic_type == B_PIC) + impeg2d_dec_pnb_mb_params(ps_dec); + else + impeg2d_dec_p_mb_params(ps_dec); + + IMPEG2D_TRACE_MB_START(ps_dec->u2_mb_x, ps_dec->u2_mb_y); + + u4_x_dst_offset = u4_frm_offset + (ps_dec->u2_mb_x << 4); + u4_y_dst_offset = (ps_dec->u2_mb_y << 4) * ps_dec->u2_picture_width; + pu1_out_p = ps_cur_frm_buf->pu1_y + u4_x_dst_offset + u4_y_dst_offset; + if(ps_dec->u2_prev_intra_mb == 0) + { + UWORD32 offset_x, offset_y, stride; + UWORD16 index = (ps_dec->u2_motion_type); + /*only for non intra mb's*/ + if(ps_dec->e_mb_pred == BIDIRECT) + { + ps_dec_mb_params = &ps_dec->ps_func_bi_direct[index]; + } + else + { + ps_dec_mb_params = &ps_dec->ps_func_forw_or_back[index]; + } + + stride = ps_dec->u2_picture_width; + + offset_x = u4_frm_offset + (ps_dec->u2_mb_x << 4); + + offset_y = (ps_dec->u2_mb_y << 4); + + ps_dec->s_dest_buf.pu1_y = ps_cur_frm_buf->pu1_y + offset_y * stride + offset_x; + + stride = stride >> 1; + + ps_dec->s_dest_buf.pu1_u = ps_cur_frm_buf->pu1_u + (offset_y >> 1) * stride + + (offset_x >> 1); + + ps_dec->s_dest_buf.pu1_v = ps_cur_frm_buf->pu1_v + (offset_y >> 1) * stride + + (offset_x >> 1); + + PROFILE_DISABLE_MC_IF0 + ps_dec_mb_params->pf_mc(ps_dec); + + } + for(i = 0; i < NUM_LUMA_BLKS; ++i) + { + if((ps_dec->u2_cbp & (1 << (BLOCKS_IN_MB - 1 - i))) != 0) + { + e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, ps_dec->pu1_inv_scan_matrix, + ps_dec->u2_prev_intra_mb, Y_LUMA, 0); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + u4_x_offset = gai2_impeg2_blk_x_off[i]; + + if(ps_dec->u2_field_dct == 0) + u4_y_offset = gai2_impeg2_blk_y_off_frm[i] ; + else + u4_y_offset = gai2_impeg2_blk_y_off_fld[i] ; + + + + + + IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + + PROFILE_DISABLE_IDCT_IF0 + { + WORD32 idx; + if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + idx = 0; + else + idx = 1; + + if(0 == ps_dec->u2_prev_intra_mb) + { + pu1_pred = pu1_out_p + u4_y_offset * ps_dec->u2_picture_width + u4_x_offset; + u4_pred_strd = ps_dec->u2_picture_width << ps_dec->u2_field_dct; + } + else + { + pu1_pred = (UWORD8 *)gau1_impeg2_zerobuf; + u4_pred_strd = 8; + } + + ps_dec->pf_idct_recon[idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out, + ps_dec->ai2_idct_stg1, + pu1_pred, + pu1_out_p + u4_y_offset * ps_dec->u2_picture_width + u4_x_offset, + 8, + u4_pred_strd, + ps_dec->u2_picture_width << ps_dec->u2_field_dct, + ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows); + } + } + + } + + /* For U and V blocks, divide the x and y offsets by 2. */ + u4_x_dst_offset >>= 1; + u4_y_dst_offset >>= 2; + + + /* In case of chrominance blocks the DCT will be frame DCT */ + /* i = 0, U component and i = 1 is V componet */ + if((ps_dec->u2_cbp & 0x02) != 0) + { + pu1_out_p = ps_cur_frm_buf->pu1_u + u4_x_dst_offset + u4_y_dst_offset; + e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, ps_dec->pu1_inv_scan_matrix, + ps_dec->u2_prev_intra_mb, U_CHROMA, 0); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + + IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + + PROFILE_DISABLE_IDCT_IF0 + { + WORD32 idx; + if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + idx = 0; + else + idx = 1; + + if(0 == ps_dec->u2_prev_intra_mb) + { + pu1_pred = pu1_out_p; + u4_pred_strd = ps_dec->u2_picture_width >> 1; + } + else + { + pu1_pred = (UWORD8 *)gau1_impeg2_zerobuf; + u4_pred_strd = 8; + } + + ps_dec->pf_idct_recon[idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out, + ps_dec->ai2_idct_stg1, + pu1_pred, + pu1_out_p, + 8, + u4_pred_strd, + ps_dec->u2_picture_width >> 1, + ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows); + + } + + } + + + if((ps_dec->u2_cbp & 0x01) != 0) + { + pu1_out_p = ps_cur_frm_buf->pu1_v + u4_x_dst_offset + u4_y_dst_offset; + e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, ps_dec->pu1_inv_scan_matrix, + ps_dec->u2_prev_intra_mb, V_CHROMA, 0); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + + IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + + PROFILE_DISABLE_IDCT_IF0 + { + WORD32 idx; + if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + idx = 0; + else + idx = 1; + if(0 == ps_dec->u2_prev_intra_mb) + { + pu1_pred = pu1_out_p; + u4_pred_strd = ps_dec->u2_picture_width >> 1; + } + else + { + pu1_pred = (UWORD8 *)gau1_impeg2_zerobuf; + u4_pred_strd = 8; + } + + ps_dec->pf_idct_recon[idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out, + ps_dec->ai2_idct_stg1, + pu1_pred, + pu1_out_p, + 8, + u4_pred_strd, + ps_dec->u2_picture_width >> 1, + ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows); + + } + } + + + ps_dec->u2_num_mbs_left--; + ps_dec->u2_first_mb = 0; + ps_dec->u2_mb_x++; + + if(ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset) + { + return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR; + } + else if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset)) + { + ps_dec->u2_mb_x = 0; + ps_dec->u2_mb_y++; + + } + } + while(ps_dec->u2_num_mbs_left != 0 && impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,23) != 0x0); + return e_error; +} diff --git a/decoder/impeg2d_structs.h b/decoder/impeg2d_structs.h new file mode 100755 index 0000000..63a0b03 --- /dev/null +++ b/decoder/impeg2d_structs.h @@ -0,0 +1,377 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2D_STRUCTS_H__ +#define __IMPEG2D_STRUCTS_H__ + +typedef enum +{ + CMD_PROCESS, + CMD_FMTCONV, +}e_jobq_cmd_t; + +/** + * Structure to represent a processing job entry + */ +typedef struct +{ + /** + * Command + * Currently: PROCESS, FMTCONV are the only two jobs + */ + WORD32 i4_cmd; + + /** + * MB y of the starting MB + */ + WORD16 i2_start_mb_y; + + /** + * MB y of the last MB + */ + + WORD16 i2_end_mb_y; + + /** + * Bitstream offset for the current job + */ + WORD32 i4_bistream_ofst; + +}job_t; + +typedef struct +{ + /* Params of the reference buffer used as input to MC */ + UWORD32 u4_src_wd; + UWORD32 u4_src_offset; + + /* Params of the buffer where MC output will be written */ + UWORD32 u4_dst_wd_res_buf; + UWORD32 u4_dst_wd_cur_frm; + UWORD32 u4_dst_offset_res_buf; + UWORD32 u4_dst_offset_cur_frm; + + /* Operation Parameters */ + UWORD32 u4_rows; + UWORD32 u4_cols; + UWORD32 u4_mode; +}comp_mc_params_t; + +typedef struct +{ + yuv_buf_t s_ref; + comp_mc_params_t s_luma; + comp_mc_params_t s_chroma; +}mb_mc_params_t; + +struct _dec_mb_params_t; + +typedef UWORD8 pf_inv_quant_t (WORD16 *blk, + UWORD8 *weighting_matrix, + UWORD8 quant_scale, + WORD32 intra_flag, + WORD32 i4_num_coeffs, + WORD16 *pi2_coeffs, + UWORD8 *pu1_pos, + const UWORD8 *scan, + UWORD16 *u2_def_dc_pred, + UWORD16 u2_intra_dc_precision); + +typedef IMPEG2D_ERROR_CODES_T pf_vld_inv_quant_t (void *dec, + WORD16 *out_addr, + const UWORD8 *scan, + UWORD16 intra_flag, + UWORD16 colr_comp, + UWORD16 d_picture); + +typedef void pf_mc_t(void *, UWORD8 *, UWORD32 , UWORD8 *, UWORD32 , + UWORD32 , UWORD32 ); + +typedef struct dec_state_struct_t +{ + WORD16 ai2_vld_buf[NUM_PELS_IN_BLOCK]; + WORD16 ai2_idct_stg1[NUM_PELS_IN_BLOCK]; + + + UWORD8 au1_intra_quant_matrix[NUM_PELS_IN_BLOCK]; + UWORD8 au1_inter_quant_matrix[NUM_PELS_IN_BLOCK]; + + IMPEG2D_ERROR_CODES_T (*pf_decode_slice)(struct dec_state_struct_t *); + + pf_vld_inv_quant_t *pf_vld_inv_quant; + + pf_idct_recon_t *pf_idct_recon[4]; + + pf_mc_t *pf_mc[4]; + pf_interpred_t *pf_fullx_halfy_8x8; + pf_interpred_t *pf_halfx_fully_8x8; + pf_interpred_t *pf_halfx_halfy_8x8; + pf_interpred_t *pf_fullx_fully_8x8; + + + pf_interpolate_t *pf_interpolate; + pf_copy_mb_t *pf_copy_mb; + + pf_memset0_one_16bit_buf_t *pf_memset_16bit_8x8_linear_block; + pf_memset_8bit_t *pf_memset_8bit_8x8_block; + pf_copy_yuv420p_buf_t *pf_copy_yuv420p_buf; + pf_fmt_conv_yuv420p_to_yuv422ile_t *pf_fmt_conv_yuv420p_to_yuv422ile; + pf_fmt_conv_yuv420p_to_yuv420sp_t *pf_fmt_conv_yuv420p_to_yuv420sp_uv; + pf_fmt_conv_yuv420p_to_yuv420sp_t *pf_fmt_conv_yuv420p_to_yuv420sp_vu; + + stream_t s_bit_stream; +/* @ */ + + UWORD16 u2_is_mpeg2; /* 0 if stream is MPEG1 1 otherwise */ + UWORD16 u2_frame_width; /* Width of the frame */ + UWORD16 u2_frame_height; /* Height of the frame */ + UWORD16 u2_picture_width; + UWORD16 u2_horizontal_size; + UWORD16 u2_vertical_size; + UWORD16 u2_create_max_width; + UWORD16 u2_create_max_height; + UWORD16 u2_reinit_max_width; + UWORD16 u2_reinit_max_height; + UWORD16 u2_header_done; + UWORD16 u2_decode_header; + + UWORD16 u2_mb_x; + UWORD16 u2_mb_y; + UWORD16 u2_num_horiz_mb; + UWORD16 u2_num_vert_mb; + UWORD16 u2_num_flds_decoded; + void *pv_pic_buf_mg; + + UWORD32 u4_frm_buf_stride; /* for display Buffer */ + + UWORD16 u2_field_dct; + UWORD16 u2_read_dct_type; + + UWORD16 u2_read_motion_type; + UWORD16 u2_motion_type; + + const UWORD16 *pu2_mb_type; + UWORD16 u2_fld_pic; + UWORD16 u2_frm_pic; + + yuv_buf_t s_cur_frm_buf; + + UWORD16 u2_fld_parity; + UWORD16 u2_def_dc_pred[MAX_COLR_COMPS]; + + /* Variables related to Motion Vector predictors */ + + WORD16 ai2_pred_mv[2][2][2]; + e_pred_direction_t e_mb_pred; + UWORD16 au2_fcode_data[2]; + + /* Variables related to reference pictures */ + yuv_buf_t as_recent_fld[2][2]; + + UWORD8 u1_quant_scale; + UWORD16 u2_num_mbs_left; + UWORD16 u2_first_mb; + UWORD16 u2_num_skipped_mbs; + + UWORD8 *pu1_inv_scan_matrix; + + UWORD16 u2_progressive_sequence; + e_pic_type_t e_pic_type; + + UWORD16 u2_full_pel_forw_vector; + UWORD16 u2_forw_f_code; + UWORD16 u2_full_pel_back_vector; + UWORD16 u2_back_f_code; + + WORD16 ai2_mv[2][2][2]; /* Motion vectors */ + + /* Bitstream code present in Picture coding extension */ + UWORD16 au2_f_code[2][2]; + UWORD16 u2_intra_dc_precision; + UWORD16 u2_picture_structure; + UWORD16 u2_top_field_first; + UWORD16 u2_frame_pred_frame_dct; + UWORD16 u2_concealment_motion_vectors; + UWORD16 u2_q_scale_type; + UWORD16 u2_intra_vlc_format; + UWORD16 u2_alternate_scan; + UWORD16 u2_repeat_first_field; + UWORD16 u2_progressive_frame; + + + /* Bitstream code related to frame rate of the bitstream */ + UWORD16 u2_frame_rate_code; + UWORD16 u2_frame_rate_extension_n; + UWORD16 u2_frame_rate_extension_d; + UWORD16 u2_framePeriod; /* Frame period in milli seconds */ + + /* Members related to display dimensions of bitstream */ + /* The size values may not be returned right now. But they are read */ + /* and can be returned if there is a requirement. */ + UWORD16 u2_display_horizontal_size; + UWORD16 u2_display_vertical_size; + UWORD16 u2_aspect_ratio_info; + + /* Members related to motion compensation */ + yuv_buf_t s_mc_fw_buf; + yuv_buf_t s_mc_bk_buf; + yuv_buf_t s_mc_buf; + mb_mc_params_t as_mb_mc_params[2][2]; + yuv_buf_t as_ref_buf[2][2]; + e_mb_type_t s_mb_type; + + yuv_buf_t s_dest_buf; + + /* Variable to handle intra MB */ + UWORD16 u2_prev_intra_mb; + UWORD16 u2_coded_mb; + + /* Bidirect function pointers */ + const struct _dec_mb_params_t *ps_func_bi_direct; + + /* Forw or Back function pointers */ + const struct _dec_mb_params_t *ps_func_forw_or_back; + + + /* CBP of the current MB */ + UWORD16 u2_cbp; + void *pv_video_scratch; + + + /* For global error handling */ + void *pv_stack_cntxt; + +/* @ */ + WORD32 i4_chromaFormat; + UWORD32 u4_xdmBufID; + UWORD32 u4_num_mem_records; + /* For holding memRecords */ + void *pv_memTab; + + UWORD8 u1_flushfrm; + UWORD8 u1_flushcnt; + iv_yuv_buf_t as_frame_buf[MAX_FRAME_BUFFER]; + iv_yuv_buf_t ps_yuv_buf; + + ivd_get_display_frame_op_t s_disp_op; + + + UWORD32 u4_non_zero_cols; + UWORD32 u4_non_zero_rows; + + UWORD32 u4_num_frames_decoded; + + /* Adding error code variable to signal benign errors. */ + UWORD32 u4_error_code; + + WORD32 i4_num_cores; + + UWORD8 u1_first_frame_done; + + void *pv_codec_thread_handle; + void *ps_dec_state_multi_core; + UWORD32 u4_inp_ts; + pic_buf_t *ps_cur_pic; + pic_buf_t *ps_disp_pic; + pic_buf_t *aps_ref_pics[2]; + + WORD32 i4_disp_buf_id; + WORD32 i4_cur_buf_id; + iv_yuv_buf_t *ps_disp_frm_buf; + + UWORD32 u4_share_disp_buf; + void *pv_pic_buf_base; + + disp_mgr_t s_disp_mgr; + UWORD8 *pu1_chroma_ref_buf[BUF_MGR_MAX_CNT]; + ivd_out_bufdesc_t as_disp_buffers[BUF_MGR_MAX_CNT]; + + /* Flag to signal last coeff in a 8x8 block is one + after mismatch contol */ + WORD32 i4_last_value_one; + + WORD32 i4_start_mb_y; + WORD32 i4_end_mb_y; + + /** + * Job queue buffer base + */ + void *pv_jobq_buf; + + /** + * Job Queue mem tab size + */ + WORD32 i4_jobq_buf_size; + + /** + * Job Queue context + */ + void *pv_jobq; + + /* Pointer to input bitstream */ + UWORD8 *pu1_inp_bits_buf; + + /* Number of bytes in the input bitstream */ + UWORD32 u4_num_inp_bytes; + + /* Bytes consumed */ + WORD32 i4_bytes_consumed; + + IVD_ARCH_T e_processor_arch; + + IVD_SOC_T e_processor_soc; + + WORD32 i4_frame_decoded; + +}dec_state_t; + + + + +typedef void (*func_decmb_params)(dec_state_t *); +typedef void (*mc_funcs)(dec_state_t *); +typedef struct _dec_mb_params_t +{ + func_decmb_params pf_func_mb_params; + e_mb_type_t s_mb_type; + mc_funcs pf_mc; +}dec_mb_params_t; + + + +#define MAX_THREADS 4 + + +#define MAX_MB_ROWS (MAX_HEIGHT / 16) // number of rows for 1080p + +typedef struct _dec_state_multi_core +{ + // contains the decoder state of decoder for each thread + dec_state_t *ps_dec_state[MAX_THREADS]; + UWORD32 au4_thread_launched[MAX_THREADS]; + // number of rows: first thread will populate the row offsets and update + // row_offset_cnt. Other threads should pick up offset from this thread + // and start decoding + UWORD32 au4_row_offset[MAX_MB_ROWS]; + volatile UWORD32 u4_row_offset_cnt; +}dec_state_multi_core_t; + + + +#endif /* #ifndef __IMPEG2D_STRUCTS_H__ */ diff --git a/decoder/impeg2d_vld.c b/decoder/impeg2d_vld.c new file mode 100644 index 0000000..972f42a --- /dev/null +++ b/decoder/impeg2d_vld.c @@ -0,0 +1,1183 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" + + +/******************************************************************************* +* Function name : impeg2d_dec_vld_symbol +* +* Description : Performs decoding of VLD symbol. It performs decoding by +* processing 1 bit at a time +* +* Arguments : +* stream : Bitstream +* ai2_code_table : Table used for decoding +* maxLen : Maximum Length of the decoded symbol in bits +* +* Value Returned: Decoded symbol +*******************************************************************************/ +WORD16 impeg2d_dec_vld_symbol(stream_t *ps_stream,const WORD16 ai2_code_table[][2], UWORD16 u2_max_len) +{ + UWORD16 u2_data; + WORD16 u2_end = 0; + UWORD16 u2_org_max_len = u2_max_len; + UWORD16 u2_i_bit; + + /* Get the maximum number of bits needed to decode a symbol */ + u2_data = impeg2d_bit_stream_nxt(ps_stream,u2_max_len); + do + { + u2_max_len--; + /* Read one bit at a time from the variable to decode the huffman code */ + u2_i_bit = (UWORD8)((u2_data >> u2_max_len) & 0x1); + + /* Get the next node pointer or the symbol from the tree */ + u2_end = ai2_code_table[u2_end][u2_i_bit]; + }while(u2_end > 0); + + /* Flush the appropriate number of bits from the ps_stream */ + impeg2d_bit_stream_flush(ps_stream,(UWORD8)(u2_org_max_len - u2_max_len)); + return(u2_end); +} +/******************************************************************************* +* Function name : impeg2d_fast_dec_vld_symbol +* +* Description : Performs decoding of VLD symbol. It performs decoding by +* processing n bits at a time +* +* Arguments : +* stream : Bitstream +* ai2_code_table : Code table containing huffman value +* indexTable : Index table containing index +* maxLen : Maximum Length of the decoded symbol in bits +* +* Value Returned: Decoded symbol +*******************************************************************************/ +WORD16 impeg2d_fast_dec_vld_symbol(stream_t *ps_stream, + const WORD16 ai2_code_table[][2], + const UWORD16 au2_indexTable[][2], + UWORD16 u2_max_len) +{ + UWORD16 u2_cur_code; + UWORD16 u2_num_bits; + UWORD16 u2_vld_offset; + UWORD16 u2_start_len; + WORD16 u2_value; + UWORD16 u2_len; + UWORD16 u2_huffCode; + + u2_start_len = au2_indexTable[0][0]; + u2_vld_offset = 0; + u2_huffCode = impeg2d_bit_stream_nxt(ps_stream,u2_max_len); + do + { + u2_cur_code = u2_huffCode >> (u2_max_len - u2_start_len); + u2_num_bits = ai2_code_table[u2_cur_code + u2_vld_offset][0]; + if(u2_num_bits == 0) + { + u2_huffCode &= ((1 << (u2_max_len - u2_start_len)) - 1); + u2_max_len -= u2_start_len; + u2_start_len = au2_indexTable[ai2_code_table[u2_cur_code + u2_vld_offset][1]][0]; + u2_vld_offset = au2_indexTable[ai2_code_table[u2_cur_code + u2_vld_offset][1]][1]; + } + else + { + u2_value = ai2_code_table[u2_cur_code + u2_vld_offset][1]; + u2_len = u2_num_bits; + } + }while(u2_num_bits == 0); + impeg2d_bit_stream_flush(ps_stream,u2_len); + return(u2_value); +} +/****************************************************************************** +* +* Function Name : impeg2d_dec_ac_coeff_zero +* +* Description : Decodes using Table B.14 +* +* Arguments : Pointer to VideoObjectLayerStructure +* +* Values Returned : Decoded value +* +* Revision History: +* +* 28 02 2002 AR Creation +*******************************************************************************/ +UWORD16 impeg2d_dec_ac_coeff_zero(stream_t *ps_stream, UWORD16* pu2_sym_len, UWORD16* pu2_sym_val) +{ + UWORD16 u2_offset,u2_decoded_value; + UWORD8 u1_shift; + UWORD32 u4_bits_read; + + u4_bits_read = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,MPEG2_AC_COEFF_MAX_LEN); + + if ((UWORD16)u4_bits_read >= 0x0800) + { + u2_offset = (UWORD16)u4_bits_read >> 11; + } + else if ((UWORD16)u4_bits_read >= 0x40) + { + u2_offset = 31 + ((UWORD16)u4_bits_read >> 6); + } + else if ((UWORD16)u4_bits_read >= 0x20) + { + u2_offset = 64; + } + else + { + u2_offset = 63; + u4_bits_read = (UWORD16)u4_bits_read - 0x10; + } + /*----------------------------------------------------------------------- + * The table gOffset contains both the offset for the group to which the + * Vld code belongs in the Ac Coeff Table and the no of bits with which + * the BitsRead should be shifted + *-----------------------------------------------------------------------*/ + u2_offset = gau2_impeg2d_offset_zero[u2_offset]; + u1_shift = u2_offset & 0xF; + + /*----------------------------------------------------------------------- + * Depending upon the vld code, we index exactly to that particular + * Vld codes value in the Ac Coeff Table. + * (Offset >> 4) gives the offset for the group in the AcCoeffTable. + * (BitsRead >> shift) gives the offset within its group + *-----------------------------------------------------------------------*/ + u2_offset = (u2_offset >> 4) + ((UWORD16)u4_bits_read >> u1_shift); + /*----------------------------------------------------------------------- + * DecodedValue has the Run, Level and the number of bits used by Vld code + *-----------------------------------------------------------------------*/ + u2_decoded_value = gau2_impeg2d_dct_coeff_zero[u2_offset]; + if(u2_decoded_value == END_OF_BLOCK) + { + *pu2_sym_len = 2; + *pu2_sym_val = EOB_CODE_VALUE; + } + else if(u2_decoded_value == ESCAPE_CODE) + { + *pu2_sym_len = u2_decoded_value & 0x1F; + *pu2_sym_val = ESC_CODE_VALUE; + } + else + { + *pu2_sym_len = u2_decoded_value & 0x1F; + *pu2_sym_val = u2_decoded_value >> 5; + } + return(u2_decoded_value); +} + +/****************************************************************************** +* +* Function Name : impeg2d_dec_ac_coeff_one +* +* Description : Decodes using Table B.15 +* +* Arguments : Pointer to VideoObjectLayerStructure +* +* Values Returned : Decoded value +* +* Revision History: +* +* 28 02 2002 AR Creation +*******************************************************************************/ +UWORD16 impeg2d_dec_ac_coeff_one(stream_t *ps_stream, UWORD16* pu2_sym_len, UWORD16* pu2_sym_val) +{ + UWORD16 u2_offset, u2_decoded_value; + UWORD8 u1_shift; + UWORD32 u4_bits_read; + + + u4_bits_read = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,MPEG2_AC_COEFF_MAX_LEN); + + if ((UWORD16)u4_bits_read >= 0x8000) + { + /* If the MSB of the vld code is 1 */ + if (((UWORD16)u4_bits_read >> 12) == 0xF) + u2_offset = ((UWORD16)u4_bits_read >> 8) & 0xF; + else + u2_offset = (UWORD16)u4_bits_read >> 11; + u2_offset += gau2_impeg2d_offset_one[0]; + } + else if ((UWORD16)u4_bits_read >= 0x400) + { + u2_offset =(UWORD16) u4_bits_read >> 10; + u2_offset = gau2_impeg2d_offset_one[u2_offset]; + u1_shift = u2_offset & 0xF; + u2_offset = (u2_offset >> 4) + ((UWORD16)u4_bits_read >> u1_shift); + } + else if ((UWORD16)u4_bits_read >= 0x20) + { + u2_offset = ((UWORD16)u4_bits_read >> 5) + 31; + u2_offset = gau2_impeg2d_offset_one[u2_offset]; + u1_shift = u2_offset & 0xF; + u2_offset = (u2_offset >> 4) + ((UWORD16)u4_bits_read >> u1_shift); + } + else + { + u2_offset = gau2_impeg2d_offset_one[63] + ((UWORD16)u4_bits_read & 0xF); + } + /*----------------------------------------------------------------------- + * DecodedValue has the Run, Level and the number of bits used by Vld code + *-----------------------------------------------------------------------*/ + u2_decoded_value = gau2_impeg2d_dct_coeff_one[u2_offset]; + + if(u2_decoded_value == END_OF_BLOCK) + { + *pu2_sym_len = 4; + *pu2_sym_val = EOB_CODE_VALUE; + } + else if(u2_decoded_value == ESCAPE_CODE) + { + *pu2_sym_len = u2_decoded_value & 0x1F; + *pu2_sym_val = ESC_CODE_VALUE; + } + else + { + *pu2_sym_len = u2_decoded_value & 0x1F; + *pu2_sym_val = u2_decoded_value >> 5; + } + + return(u2_decoded_value); +} + +/****************************************************************************** + * + * Function Name : impeg2d_vld_inv_quant_mpeg1 + * + * Description : Performs VLD operation for MPEG1/2 + * + * Arguments : + * state : VLCD state parameter + * regs : Registers of VLCD + * + * Values Returned : None + ******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_vld_inv_quant_mpeg1( + void *pv_dec, /* Decoder State */ + WORD16 *pi2_out_addr, /*!< Address where decoded symbols will be stored */ + const UWORD8 *pu1_scan, /*!< Scan table to be used */ + UWORD16 u2_intra_flag, /*!< Intra Macroblock or not */ + UWORD16 u2_colr_comp, /*!< 0 - Luma,1 - U comp, 2 - V comp */ + UWORD16 u2_d_picture /*!< D Picture or not */ + ) +{ + UWORD8 *pu1_weighting_matrix; + dec_state_t *ps_dec = (dec_state_t *) pv_dec; + IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; + + WORD16 pi2_coeffs[NUM_COEFFS]; + UWORD8 pu1_pos[NUM_COEFFS]; + WORD32 i4_num_coeffs; + + /* Perform VLD on the stream to get the coefficients and their positions */ + e_error = impeg2d_vld_decode(ps_dec, pi2_coeffs, pu1_scan, pu1_pos, u2_intra_flag, + u2_colr_comp, u2_d_picture, ps_dec->u2_intra_vlc_format, + ps_dec->u2_is_mpeg2, &i4_num_coeffs); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + /* For YUV420 format,Select the weighting matrix according to Table 7.5 */ + pu1_weighting_matrix = (u2_intra_flag == 1) ? ps_dec->au1_intra_quant_matrix: + ps_dec->au1_inter_quant_matrix; + + IMPEG2D_IQNT_INP_STATISTICS(pi2_out_addr, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + /* Inverse Quantize the Output of VLD */ + PROFILE_DISABLE_INVQUANT_IF0 + + { + /* Clear output matrix */ + PROFILE_DISABLE_MEMSET_RESBUF_IF0 + if (1 != (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + { + ps_dec->pf_memset_16bit_8x8_linear_block (pi2_out_addr); + } + + impeg2d_inv_quant_mpeg1(pi2_out_addr, pu1_weighting_matrix, + ps_dec->u1_quant_scale, u2_intra_flag, + i4_num_coeffs, pi2_coeffs, pu1_pos, + pu1_scan, &ps_dec->u2_def_dc_pred[u2_colr_comp], + ps_dec->u2_intra_dc_precision); + + if (0 != pi2_out_addr[0]) + { + /* The first coeff might've become non-zero due to intra_dc_decision + * value. So, check here after inverse quantization. + */ + ps_dec->u4_non_zero_cols |= 0x1; + ps_dec->u4_non_zero_rows |= 0x1; + } + } + + return e_error; +} + +/****************************************************************************** + * + * Function Name : impeg2d_vld_inv_quant_mpeg2 + * + * Description : Performs VLD operation for MPEG1/2 + * + * Arguments : + * state : VLCD state parameter + * regs : Registers of VLCD + * + * Values Returned : None + ******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_vld_inv_quant_mpeg2( + void *pv_dec, /* Decoder State */ + WORD16 *pi2_out_addr, /*!< Address where decoded symbols will be stored */ + const UWORD8 *pu1_scan, /*!< Scan table to be used */ + UWORD16 u2_intra_flag, /*!< Intra Macroblock or not */ + UWORD16 u2_colr_comp, /*!< 0 - Luma,1 - U comp, 2 - V comp */ + UWORD16 u2_d_picture /*!< D Picture or not */ + ) +{ + UWORD8 *pu1_weighting_matrix; + WORD32 u4_sum_is_even; + dec_state_t *ps_dec = (dec_state_t *)pv_dec; + IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; + + WORD16 pi2_coeffs[NUM_COEFFS]; + UWORD8 pi4_pos[NUM_COEFFS]; + WORD32 i4_num_coeffs; + + /* Perform VLD on the stream to get the coefficients and their positions */ + e_error = impeg2d_vld_decode(ps_dec, pi2_coeffs, pu1_scan, pi4_pos, u2_intra_flag, + u2_colr_comp, u2_d_picture, ps_dec->u2_intra_vlc_format, + ps_dec->u2_is_mpeg2, &i4_num_coeffs); + if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error) + { + return e_error; + } + + /* For YUV420 format,Select the weighting matrix according to Table 7.5 */ + pu1_weighting_matrix = (u2_intra_flag == 1) ? ps_dec->au1_intra_quant_matrix: + ps_dec->au1_inter_quant_matrix; + + /*mismatch control for mpeg2*/ + /* Check if the block has only one non-zero coeff which is DC */ + ps_dec->i4_last_value_one = 0; + + IMPEG2D_IQNT_INP_STATISTICS(pi2_out_addr, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows); + + /* Inverse Quantize the Output of VLD */ + PROFILE_DISABLE_INVQUANT_IF0 + + { + /* Clear output matrix */ + PROFILE_DISABLE_MEMSET_RESBUF_IF0 + if (1 != (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + { + ps_dec->pf_memset_16bit_8x8_linear_block (pi2_out_addr); + } + + u4_sum_is_even = impeg2d_inv_quant_mpeg2(pi2_out_addr, pu1_weighting_matrix, + ps_dec->u1_quant_scale, u2_intra_flag, + i4_num_coeffs, pi2_coeffs, + pi4_pos, pu1_scan, + &ps_dec->u2_def_dc_pred[u2_colr_comp], + ps_dec->u2_intra_dc_precision); + + if (0 != pi2_out_addr[0]) + { + /* The first coeff might've become non-zero due to intra_dc_decision + * value. So, check here after inverse quantization. + */ + ps_dec->u4_non_zero_cols |= 0x1; + ps_dec->u4_non_zero_rows |= 0x1; + } + + if (1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows)) + { + ps_dec->i4_last_value_one = 1 - (pi2_out_addr[0] & 1); + } + else + { + /*toggle last bit if sum is even ,else retain it as it is*/ + pi2_out_addr[63] ^= (u4_sum_is_even & 1); + + if (0 != pi2_out_addr[63]) + { + ps_dec->u4_non_zero_cols |= 0x80; + ps_dec->u4_non_zero_rows |= 0x80; + } + } + } + + return e_error; +} + + +/****************************************************************************** +* +* Function Name : impeg2d_vld_decode +* +* Description : Performs VLD operation for MPEG1/2 +* +* Arguments : +* state : VLCD state parameter +* regs : Registers of VLCD +* +* Values Returned : None +******************************************************************************/ +IMPEG2D_ERROR_CODES_T impeg2d_vld_decode( + dec_state_t *ps_dec, + WORD16 *pi2_outAddr, /*!< Address where decoded symbols will be stored */ + const UWORD8 *pu1_scan, /*!< Scan table to be used */ + UWORD8 *pu1_pos, /*!< Scan table to be used */ + UWORD16 u2_intra_flag, /*!< Intra Macroblock or not */ + UWORD16 u2_chroma_flag, /*!< Chroma Block or not */ + UWORD16 u2_d_picture, /*!< D Picture or not */ + UWORD16 u2_intra_vlc_format, /*!< Intra VLC format */ + UWORD16 u2_mpeg2, /*!< MPEG-2 or not */ + WORD32 *pi4_num_coeffs /*!< Returns the number of coeffs in block */ + ) +{ + + UWORD32 u4_sym_len; + + UWORD32 u4_decoded_value; + UWORD32 u4_level_first_byte; + WORD32 u4_level; + UWORD32 u4_run, u4_numCoeffs; + UWORD32 u4_buf; + UWORD32 u4_buf_nxt; + UWORD32 u4_offset; + UWORD32 *pu4_buf_aligned; + UWORD32 u4_bits; + stream_t *ps_stream = &ps_dec->s_bit_stream; + WORD32 u4_pos; + UWORD32 u4_nz_cols; + UWORD32 u4_nz_rows; + + *pi4_num_coeffs = 0; + + ps_dec->u4_non_zero_cols = 0; + ps_dec->u4_non_zero_rows = 0; + u4_nz_cols = ps_dec->u4_non_zero_cols; + u4_nz_rows = ps_dec->u4_non_zero_rows; + + GET_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,ps_stream) + /**************************************************************************/ + /* Decode the DC coefficient in case of Intra block */ + /**************************************************************************/ + if(u2_intra_flag) + { + WORD32 dc_size; + WORD32 dc_diff; + WORD32 maxLen; + WORD32 idx; + + + maxLen = MPEG2_DCT_DC_SIZE_LEN; + idx = 0; + if(u2_chroma_flag != 0) + { + maxLen += 1; + idx++; + } + + + { + WORD16 end = 0; + UWORD32 maxLen_tmp = maxLen; + UWORD16 m_iBit; + + + /* Get the maximum number of bits needed to decode a symbol */ + IBITS_NXT(u4_buf,u4_buf_nxt,u4_offset,u4_bits,maxLen) + do + { + maxLen_tmp--; + /* Read one bit at a time from the variable to decode the huffman code */ + m_iBit = (UWORD8)((u4_bits >> maxLen_tmp) & 0x1); + + /* Get the next node pointer or the symbol from the tree */ + end = gai2_impeg2d_dct_dc_size[idx][end][m_iBit]; + }while(end > 0); + dc_size = end + MPEG2_DCT_DC_SIZE_OFFSET; + + /* Flush the appropriate number of bits from the stream */ + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,(maxLen - maxLen_tmp),pu4_buf_aligned) + + } + + + + if (dc_size != 0) + { + UWORD32 u4_bits; + + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned, dc_size) + dc_diff = u4_bits; + + if ((dc_diff & (1 << (dc_size - 1))) == 0) //v Probably the prediction algo? + dc_diff -= (1 << dc_size) - 1; + } + else + { + dc_diff = 0; + } + + + pi2_outAddr[*pi4_num_coeffs] = dc_diff; + /* This indicates the position of the coefficient. Since this is the DC + * coefficient, we put the position as 0. + */ + pu1_pos[*pi4_num_coeffs] = pu1_scan[0]; + (*pi4_num_coeffs)++; + + if (0 != dc_diff) + { + u4_nz_cols |= 0x01; + u4_nz_rows |= 0x01; + } + + u4_numCoeffs = 1; + } + /**************************************************************************/ + /* Decoding of first AC coefficient in case of non Intra block */ + /**************************************************************************/ + else + { + /* First symbol can be 1s */ + UWORD32 u4_bits; + + IBITS_NXT(u4_buf,u4_buf_nxt,u4_offset,u4_bits,1) + + if(u4_bits == 1) + { + + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,1, pu4_buf_aligned) + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned, 1) + if(u4_bits == 1) + { + pi2_outAddr[*pi4_num_coeffs] = -1; + } + else + { + pi2_outAddr[*pi4_num_coeffs] = 1; + } + + /* This indicates the position of the coefficient. Since this is the DC + * coefficient, we put the position as 0. + */ + pu1_pos[*pi4_num_coeffs] = pu1_scan[0]; + (*pi4_num_coeffs)++; + u4_numCoeffs = 1; + + u4_nz_cols |= 0x01; + u4_nz_rows |= 0x01; + } + else + { + u4_numCoeffs = 0; + } + } + if (1 == u2_d_picture) + { + PUT_TEMP_STREAM_DATA(u4_buf, u4_buf_nxt, u4_offset, pu4_buf_aligned, ps_stream) + ps_dec->u4_non_zero_cols = u4_nz_cols; + ps_dec->u4_non_zero_rows = u4_nz_rows; + return ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE); + } + + + + if (1 == u2_intra_vlc_format && u2_intra_flag) + { + + while(1) + { + //Putting the impeg2d_dec_ac_coeff_one function inline. + + UWORD32 lead_zeros; + WORD16 DecodedValue; + + u4_sym_len = 17; + IBITS_NXT(u4_buf,u4_buf_nxt,u4_offset,u4_bits,u4_sym_len) + + DecodedValue = gau2_impeg2d_tab_one_1_9[u4_bits >> 8]; + u4_sym_len = (DecodedValue & 0xf); + u4_level = DecodedValue >> 9; + /* One table lookup */ + if(0 != u4_level) + { + u4_run = ((DecodedValue >> 4) & 0x1f); + u4_numCoeffs += u4_run; + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + pu1_pos[*pi4_num_coeffs] = u4_pos; + + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + pi2_outAddr[*pi4_num_coeffs] = u4_level; + + (*pi4_num_coeffs)++; + } + else + { + if (DecodedValue == END_OF_BLOCK_ONE) + { + u4_sym_len = 4; + + break; + } + else + { + /*Second table lookup*/ + lead_zeros = CLZ(u4_bits) - 20;/* -16 since we are dealing with WORD32 */ + if (0 != lead_zeros) + { + + u4_bits = (u4_bits >> (6 - lead_zeros)) & 0x001F; + + /* Flush the number of bits */ + if (1 == lead_zeros) + { + u4_sym_len = ((u4_bits & 0x18) >> 3) == 2 ? 11:10; + } + else + { + u4_sym_len = 11 + lead_zeros; + } + /* flushing */ + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + + /* Calculate the address */ + u4_bits = ((lead_zeros - 1) << 5) + u4_bits; + + DecodedValue = gau2_impeg2d_tab_one_10_16[u4_bits]; + + u4_run = BITS(DecodedValue, 8,4); + u4_level = ((WORD16) DecodedValue) >> 9; + + u4_numCoeffs += u4_run; + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + pu1_pos[*pi4_num_coeffs] = u4_pos; + pi2_outAddr[*pi4_num_coeffs] = u4_level; + (*pi4_num_coeffs)++; + } + /*********************************************************************/ + /* MPEG2 Escape Code */ + /*********************************************************************/ + else if(u2_mpeg2 == 1) + { + u4_sym_len = 6; + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,18) + u4_decoded_value = u4_bits; + u4_run = (u4_decoded_value >> 12); + u4_level = (u4_decoded_value & 0x0FFF); + + if (u4_level) + u4_level = (u4_level - ((u4_level & 0x0800) << 1)); + + u4_numCoeffs += u4_run; + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + pu1_pos[*pi4_num_coeffs] = u4_pos; + pi2_outAddr[*pi4_num_coeffs] = u4_level; + (*pi4_num_coeffs)++; + } + /*********************************************************************/ + /* MPEG1 Escape Code */ + /*********************************************************************/ + else + { + /*----------------------------------------------------------- + * MPEG-1 Stream + * + * <See D.9.3 of MPEG-2> Run-level escape syntax + * Run-level values that cannot be coded with a VLC are coded + * by the escape code '0000 01' followed by + * either a 14-bit FLC (127 <= level <= 127), + * or a 22-bit FLC (255 <= level <= 255). + * This is described in Annex B,B.5f of MPEG-1.standard + *-----------------------------------------------------------*/ + + /*----------------------------------------------------------- + * First 6 bits are the value of the Run. Next is First 8 bits + * of Level. These bits decide whether it is 14 bit FLC or + * 22-bit FLC. + * + * If( first 8 bits of Level == '1000000' or '00000000') + * then its is 22-bit FLC. + * else + * it is 14-bit FLC. + *-----------------------------------------------------------*/ + u4_sym_len = 6; + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,14) + u4_decoded_value = u4_bits; + u4_run = (u4_decoded_value >> 8); + u4_level_first_byte = (u4_decoded_value & 0x0FF); + if(u4_level_first_byte & 0x7F) + { + /*------------------------------------------------------- + * First 8 bits of level are neither 1000000 nor 00000000 + * Hence 14-bit FLC (Last 8 bits are used to get level) + * + * Level = (msb of Level_First_Byte is 1)? + * Level_First_Byte - 256 : Level_First_Byte + *-------------------------------------------------------*/ + u4_level = (u4_level_first_byte - + ((u4_level_first_byte & 0x80) << 1)); + } + else + { + /*------------------------------------------------------- + * Next 8 bits are either 1000000 or 00000000 + * Hence 22-bit FLC (Last 16 bits are used to get level) + * + * Level = (msb of Level_First_Byte is 1)? + * Level_Second_Byte - 256 : Level_Second_Byte + *-------------------------------------------------------*/ + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,8) + u4_level = u4_bits; + u4_level = (u4_level - (u4_level_first_byte << 1)); + } + u4_numCoeffs += u4_run; + + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + + pu1_pos[*pi4_num_coeffs] = u4_pos; + pi2_outAddr[*pi4_num_coeffs] = u4_level; + (*pi4_num_coeffs)++; + } + } + } + + u4_nz_cols |= 1 << (u4_pos & 0x7); + u4_nz_rows |= 1 << (u4_pos >> 0x3); + + + } + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,u4_sym_len) + if (u4_numCoeffs > 64) + { + return IMPEG2D_MB_TEX_DECODE_ERR; + } + } + else + { + // Inline + while(1) + { + + UWORD32 lead_zeros; + UWORD16 DecodedValue; + + u4_sym_len = 17; + IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_bits, u4_sym_len) + + + DecodedValue = gau2_impeg2d_tab_zero_1_9[u4_bits >> 8]; + u4_sym_len = BITS(DecodedValue, 3, 0); + u4_level = ((WORD16) DecodedValue) >> 9; + + if (0 != u4_level) + { + u4_run = BITS(DecodedValue, 8,4); + + u4_numCoeffs += u4_run; + + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + pu1_pos[*pi4_num_coeffs] = u4_pos; + + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + pi2_outAddr[*pi4_num_coeffs] = u4_level; + (*pi4_num_coeffs)++; + } + else + { + if(DecodedValue == END_OF_BLOCK_ZERO) + { + u4_sym_len = 2; + + break; + } + else + { + lead_zeros = CLZ(u4_bits) - 20;/* -15 since we are dealing with WORD32 */ + /*Second table lookup*/ + if (0 != lead_zeros) + { + u4_bits = (u4_bits >> (6 - lead_zeros)) & 0x001F; + + /* Flush the number of bits */ + u4_sym_len = 11 + lead_zeros; + + /* Calculate the address */ + u4_bits = ((lead_zeros - 1) << 5) + u4_bits; + + DecodedValue = gau2_impeg2d_tab_zero_10_16[u4_bits]; + + u4_run = BITS(DecodedValue, 8,4); + u4_level = ((WORD16) DecodedValue) >> 9; + + u4_numCoeffs += u4_run; + + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + pu1_pos[*pi4_num_coeffs] = u4_pos; + if (1 == lead_zeros) + u4_sym_len--; + /* flushing */ + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + pi2_outAddr[*pi4_num_coeffs] = u4_level; + + (*pi4_num_coeffs)++; + } + /*Escape Sequence*/ + else if(u2_mpeg2 == 1) + { + u4_sym_len = 6; + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,18) + u4_decoded_value = u4_bits; + u4_run = (u4_decoded_value >> 12); + u4_level = (u4_decoded_value & 0x0FFF); + + if (u4_level) + u4_level = (u4_level - ((u4_level & 0x0800) << 1)); + + u4_numCoeffs += u4_run; + + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + pu1_pos[*pi4_num_coeffs] = u4_pos; + pi2_outAddr[*pi4_num_coeffs] = u4_level; + + (*pi4_num_coeffs)++; + } + /*********************************************************************/ + /* MPEG1 Escape Code */ + /*********************************************************************/ + else + { + /*----------------------------------------------------------- + * MPEG-1 Stream + * + * <See D.9.3 of MPEG-2> Run-level escape syntax + * Run-level values that cannot be coded with a VLC are coded + * by the escape code '0000 01' followed by + * either a 14-bit FLC (127 <= level <= 127), + * or a 22-bit FLC (255 <= level <= 255). + * This is described in Annex B,B.5f of MPEG-1.standard + *-----------------------------------------------------------*/ + + /*----------------------------------------------------------- + * First 6 bits are the value of the Run. Next is First 8 bits + * of Level. These bits decide whether it is 14 bit FLC or + * 22-bit FLC. + * + * If( first 8 bits of Level == '1000000' or '00000000') + * then its is 22-bit FLC. + * else + * it is 14-bit FLC. + *-----------------------------------------------------------*/ + u4_sym_len = 6; + FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned) + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,14) + u4_decoded_value = u4_bits; + u4_run = (u4_decoded_value >> 8); + u4_level_first_byte = (u4_decoded_value & 0x0FF); + if(u4_level_first_byte & 0x7F) + { + /*------------------------------------------------------- + * First 8 bits of level are neither 1000000 nor 00000000 + * Hence 14-bit FLC (Last 8 bits are used to get level) + * + * Level = (msb of Level_First_Byte is 1)? + * Level_First_Byte - 256 : Level_First_Byte + *-------------------------------------------------------*/ + u4_level = (u4_level_first_byte - + ((u4_level_first_byte & 0x80) << 1)); + } + else + { + /*------------------------------------------------------- + * Next 8 bits are either 1000000 or 00000000 + * Hence 22-bit FLC (Last 16 bits are used to get level) + * + * Level = (msb of Level_First_Byte is 1)? + * Level_Second_Byte - 256 : Level_Second_Byte + *-------------------------------------------------------*/ + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,8) + u4_level = u4_bits; + u4_level = (u4_level - (u4_level_first_byte << 1)); + } + u4_numCoeffs += u4_run; + + u4_pos = pu1_scan[u4_numCoeffs++ & 63]; + pu1_pos[*pi4_num_coeffs] = u4_pos; + pi2_outAddr[*pi4_num_coeffs] = u4_level; + + (*pi4_num_coeffs)++; + } + } + } + + u4_nz_cols |= 1 << (u4_pos & 0x7); + u4_nz_rows |= 1 << (u4_pos >> 0x3); + } + if (u4_numCoeffs > 64) + { + return IMPEG2D_MB_TEX_DECODE_ERR; + } + + IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,u4_sym_len) + + } + + PUT_TEMP_STREAM_DATA(u4_buf, u4_buf_nxt, u4_offset, pu4_buf_aligned, ps_stream) + + ps_dec->u4_non_zero_cols = u4_nz_cols; + ps_dec->u4_non_zero_rows = u4_nz_rows; + + return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE; +} + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_inv_quant_mpeg1 */ +/* */ +/* Description : Inverse quantizes the output of VLD */ +/* */ +/* Inputs : */ +/* blk, - Block to be inverse quantized */ +/* weighting_matrix - Matrix to be used in inverse quant */ +/* intra_dc_precision- Precision reqd to scale intra DC value */ +/* quant_scale - Quanization scale for inverse quant */ +/* intra_flag - Intra or Not */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Implements the inverse quantize equation */ +/* */ +/* Outputs : Inverse quantized values in the block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +UWORD8 impeg2d_inv_quant_mpeg1(WORD16 *pi2_blk, + UWORD8 *pu1_weighting_matrix, + UWORD8 u1_quant_scale, + WORD32 u4_intra_flag, + WORD32 i4_num_coeffs, + WORD16 *pi2_coeffs, + UWORD8 *pu1_pos, + const UWORD8 *pu1_scan, + UWORD16 *pu2_def_dc_pred, + UWORD16 u2_intra_dc_precision) +{ + UWORD16 i4_pos; + + WORD32 i4_iter; + + /* Inverse Quantize the predicted DC value for intra MB*/ + if(u4_intra_flag == 1) + { + /**************************************************************************/ + /* Decode the DC coefficient in case of Intra block and also update */ + /* DC predictor value of the corresponding color component */ + /**************************************************************************/ + { + pi2_coeffs[0] += *pu2_def_dc_pred; + *pu2_def_dc_pred = pi2_coeffs[0]; + pi2_coeffs[0] <<= (3 - u2_intra_dc_precision); + pi2_coeffs[0] = CLIP_S12(pi2_coeffs[0]); + } + + pi2_blk[pu1_scan[0]] = pi2_coeffs[0]; + } + /************************************************************************/ + /* Inverse quantization of other DCT coefficients */ + /************************************************************************/ + for(i4_iter = u4_intra_flag; i4_iter < i4_num_coeffs; i4_iter++) + { + + WORD16 sign; + WORD32 temp, temp1; + + /* Position is the inverse scan of the index stored */ + i4_pos = pu1_pos[i4_iter]; + pi2_blk[i4_pos] = pi2_coeffs[i4_iter]; + + sign = SIGN(pi2_blk[i4_pos]); + temp = ABS(pi2_blk[i4_pos] << 1); + + /* pi2_coeffs has only non-zero elements. So no need to check + * if the coeff is non-zero. + */ + temp = temp + (1 * !u4_intra_flag); + + temp = temp * pu1_weighting_matrix[i4_pos] * u1_quant_scale; + + temp = temp >> 5; + + temp1 = temp | 1; + + temp1 = (temp1 > temp) ? (temp1 - temp) : (temp - temp1); + + temp = temp - temp1; + + if(temp < 0) + { + temp = 0; + } + + temp = temp * sign; + + temp = CLIP_S12(temp); + + pi2_blk[i4_pos] = temp; + } + + /*return value is used in the case of mpeg2 for mismatch control*/ + return (0); +} /* End of inv_quant() */ + + + +/*****************************************************************************/ +/* */ +/* Function Name : impeg2d_inv_quant_mpeg2 */ +/* */ +/* Description : Inverse quantizes the output of VLD */ +/* */ +/* Inputs : */ +/* blk, - Block to be inverse quantized */ +/* weighting_matrix - Matrix to be used in inverse quant */ +/* intra_dc_precision- Precision reqd to scale intra DC value */ +/* quant_scale - Quanization scale for inverse quant */ +/* intra_flag - Intra or Not */ +/* */ +/* Globals : None */ +/* */ +/* Processing : Implements the inverse quantize equation */ +/* */ +/* Outputs : Inverse quantized values in the block */ +/* */ +/* Returns : None */ +/* */ +/* Issues : None */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 05 09 2005 Harish M First Version */ +/* */ +/*****************************************************************************/ +UWORD8 impeg2d_inv_quant_mpeg2(WORD16 *pi2_blk, + UWORD8 *pu1_weighting_matrix, + UWORD8 u1_quant_scale, + WORD32 u4_intra_flag, + WORD32 i4_num_coeffs, + WORD16 *pi2_coeffs, + UWORD8 *pu1_pos, + const UWORD8 *pu1_scan, + UWORD16 *pu2_def_dc_pred, + UWORD16 u2_intra_dc_precision) +{ + + WORD32 i4_pos; + /* Used for Mismatch control */ + UWORD32 sum; + + WORD32 i4_iter; + + sum = 0; + + /* Inverse Quantize the predicted DC value for intra MB*/ + if(u4_intra_flag == 1) + { + /**************************************************************************/ + /* Decode the DC coefficient in case of Intra block and also update */ + /* DC predictor value of the corresponding color component */ + /**************************************************************************/ + { + pi2_coeffs[0] += *pu2_def_dc_pred; + *pu2_def_dc_pred = pi2_coeffs[0]; + pi2_coeffs[0] <<= (3 - u2_intra_dc_precision); + pi2_coeffs[0] = CLIP_S12(pi2_coeffs[0]); + } + + pi2_blk[pu1_scan[0]] = pi2_coeffs[0]; + sum = pi2_blk[0]; + } + + /************************************************************************/ + /* Inverse quantization of other DCT coefficients */ + /************************************************************************/ + for(i4_iter = u4_intra_flag; i4_iter < i4_num_coeffs; i4_iter++) + { + WORD16 sign; + WORD32 temp; + /* Position is the inverse scan of the index stored */ + i4_pos = pu1_pos[i4_iter]; + pi2_blk[i4_pos] = pi2_coeffs[i4_iter]; + + sign = SIGN(pi2_blk[i4_pos]); + temp = ABS(pi2_blk[i4_pos] << 1); + temp = temp + (1 * !u4_intra_flag); + temp = temp * pu1_weighting_matrix[i4_pos] * u1_quant_scale; + + temp = temp >> 5; + + temp = temp * sign; + + temp = CLIP_S12(temp); + + pi2_blk[i4_pos] = temp; + + sum += temp; + } + return (sum ^ 1); +} /* End of inv_quant() */ diff --git a/decoder/impeg2d_vld.h b/decoder/impeg2d_vld.h new file mode 100644 index 0000000..f52da96 --- /dev/null +++ b/decoder/impeg2d_vld.h @@ -0,0 +1,49 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2D_VLD_H__ +#define __IMPEG2D_VLD_H__ + + +WORD16 impeg2d_dec_vld_symbol(stream_t *stream,const WORD16 codeTable[][2], + UWORD16 maxLen); +WORD16 impeg2d_fast_dec_vld_symbol(stream_t *stream, + const WORD16 codeTable[][2], + const UWORD16 indexTable[][2], + UWORD16 maxLen); +IMPEG2D_ERROR_CODES_T impeg2d_vld_decode(dec_state_t *dec, WORD16 *outAddr, /*!< Address where decoded symbols will be stored */ + const UWORD8 *scan, /*!< Scan table to be used */ + UWORD8 *pu1_pos, /*!< Scan table to be used */ + UWORD16 intraFlag, /*!< Intra Macroblock or not */ + UWORD16 chromaFlag, /*!< Chroma Block or not */ + UWORD16 dPicture, /*!< D Picture or not */ + UWORD16 intraVlcFormat, /*!< Intra VLC format */ + UWORD16 mpeg2, /*!< MPEG-2 or not */ + WORD32 *pi4_num_coeffs /*!< Returns the number of coeffs in block */ + ); + +pf_vld_inv_quant_t impeg2d_vld_inv_quant_mpeg1; +pf_vld_inv_quant_t impeg2d_vld_inv_quant_mpeg2; + + +pf_inv_quant_t impeg2d_inv_quant_mpeg1; +pf_inv_quant_t impeg2d_inv_quant_mpeg2; + + +#endif /* #ifndef __IMPEG2D_VLD_H__ */ diff --git a/decoder/impeg2d_vld_tables.c b/decoder/impeg2d_vld_tables.c new file mode 100644 index 0000000..dba05ec --- /dev/null +++ b/decoder/impeg2d_vld_tables.c @@ -0,0 +1,465 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include "iv_datatypedef.h" +#include "impeg2d_vld_tables.h" + +/* Table to be used for decoding the MB increment value */ +const WORD16 gai2_impeg2d_mb_addr_incr[][2] = +{ + {1,-33},{3,2},{-31,-32},{5,4},{-29,-30},{7,6},{-27,-28},{13,8},{10,9}, + {-25,-26},{12,11},{-23,-24},{-21,-22},{25,14},{16,15},{-19,-20},{20,17}, + {19,18},{-17,-18},{-15,-16},{22,21},{-13,-14},{24,23},{-11,-12},{-9,-10}, + {34,26},{0,27},{31,28},{30,29},{-7,-8},{-5,-6},{33,32},{-3,-4},{-1,-2}, + {0,35},{36,0},{37,0},{-34,0} +}; + +/* Table to be used for decoding the MB type in case of P Pictures */ +const UWORD16 gau2_impeg2d_p_mb_type[] = +{ + 0x0100, 0x0611, 0x0512, 0x0512, 0x051a, 0x051a, 0x0501, 0x0501, 0x0308, 0x0308, + 0x0308, 0x0308, 0x0308, 0x0308, 0x0308, 0x0308, 0x0202, 0x0202, 0x0202, 0x0202, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, + 0x0202, 0x0202, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, + 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, + 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, + 0x010a, 0x010a, 0x010a, 0x010a +}; +/* Table to be used for decoding the MB type in case of B Pictures */ +const UWORD16 gau2_impeg2d_b_mb_type[] = +{ + 0x0100, 0x0611, 0x0616, 0x061a, 0x053e, 0x053e, 0x0501, 0x0501, 0x0408, 0x0408, + 0x0408, 0x0408, 0x040a, 0x040a, 0x040a, 0x040a, 0x0304, 0x0304, 0x0304, 0x0304, + 0x0304, 0x0304, 0x0304, 0x0304, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, + 0x0306, 0x0306, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, + 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022e, 0x022e, + 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, + 0x022e, 0x022e, 0x022e, 0x022e +}; +/* Table to be used for decoding dmvector[t] */ +const WORD16 gai2_impeg2d_dec_mv[] = +{ + 1,-1 +}; + +/* Tables used for motion code decode */ +const UWORD16 gau2_impeg2d_mv_code[] = +{ + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0f0a, 0x0e0a, 0x0d0a, 0x0c0a, 0x0b0a, 0x0a0a, 0x0909, 0x0909, + 0x0809, 0x0809, 0x0709, 0x0709, 0x0607, 0x0607, 0x0607, 0x0607, 0x0607, 0x0607, + 0x0607, 0x0607, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507, + 0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0306, 0x0306, + 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, + 0x0306, 0x0306, 0x0306, 0x0306, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, + 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, + 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, + 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, + 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, + 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, + 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, + 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, + 0x0002, 0x0002 +}; +/* Tables used for cbp decode */ +const UWORD16 gau2_impeg2d_cbp_code[] = +{ + 0x0100, 0x0100, 0x0927, 0x091b, 0x093b, 0x0937, 0x092f, 0x091f, 0x083a, 0x083a, + 0x0836, 0x0836, 0x082e, 0x082e, 0x081e, 0x081e, 0x0839, 0x0839, 0x0835, 0x0835, + 0x082d, 0x082d, 0x081d, 0x081d, 0x0826, 0x0826, 0x081a, 0x081a, 0x0825, 0x0825, + 0x0819, 0x0819, 0x082b, 0x082b, 0x0817, 0x0817, 0x0833, 0x0833, 0x080f, 0x080f, + 0x082a, 0x082a, 0x0816, 0x0816, 0x0832, 0x0832, 0x080e, 0x080e, 0x0829, 0x0829, + 0x0815, 0x0815, 0x0831, 0x0831, 0x080d, 0x080d, 0x0823, 0x0823, 0x0813, 0x0813, + 0x080b, 0x080b, 0x0807, 0x0807, 0x0722, 0x0722, 0x0722, 0x0722, 0x0712, 0x0712, + 0x0712, 0x0712, 0x070a, 0x070a, 0x070a, 0x070a, 0x0706, 0x0706, 0x0706, 0x0706, + 0x0721, 0x0721, 0x0721, 0x0721, 0x0711, 0x0711, 0x0711, 0x0711, 0x0709, 0x0709, + 0x0709, 0x0709, 0x0705, 0x0705, 0x0705, 0x0705, 0x063f, 0x063f, 0x063f, 0x063f, + 0x063f, 0x063f, 0x063f, 0x063f, 0x0603, 0x0603, 0x0603, 0x0603, 0x0603, 0x0603, + 0x0603, 0x0603, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624, + 0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x053e, 0x053e, + 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, + 0x053e, 0x053e, 0x053e, 0x053e, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, + 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, + 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, + 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x0501, 0x0501, 0x0501, 0x0501, + 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, + 0x0501, 0x0501, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, + 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0534, 0x0534, + 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, + 0x0534, 0x0534, 0x0534, 0x0534, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, + 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, + 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, + 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x0528, 0x0528, 0x0528, 0x0528, + 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, + 0x0528, 0x0528, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, + 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0530, 0x0530, + 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, + 0x0530, 0x0530, 0x0530, 0x0530, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, + 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, + 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, + 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, + 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, + 0x0420, 0x0420, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, + 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, + 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, + 0x0410, 0x0410, 0x0410, 0x0410, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, + 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, + 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, + 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0404, 0x0404, 0x0404, 0x0404, + 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, + 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, + 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x033c, 0x033c, + 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, + 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, + 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, + 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, + 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, + 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, + 0x033c, 0x033c +}; + + +/* Table B.14 DCT Coefficients Table zero */ +const UWORD16 gau2_impeg2d_dct_coeff_zero[] = +{ + 2640,2608,2576,2544,12400,32848,30800,28752,26704,24656,22608,63536,61488, + 59440,57392,55344,1295,1263,1231,1199,1167,1135,1103,1071,1039,2511,2479, + 2447,2415,2383,2351,2319,1006,974,942,910,878,846,814,782,750,718,686,654, + 622,590,558,526,20557,18509,10349,6285,4269,2285,2253,493,461,429,397, + 53293,51245,49197,47149,45101,364,16460,8300,332,4236,14412,43052,41004, + 300,38956,36908,2220,6252,268,12364,34860,32810,10314,234,4202,2186,30762, + 28714,8266,ESCAPE_CODE,4167,18471,135,16423,14374,12326,2118,10278,26664, + 200,24616,22568,6216,2152,168,20520,101,101,101,101,101,101,101,101,8229, + 8229,8229,8229,8229,8229,8229,8229,6181,6181,6181,6181,6181,6181,6181,6181, + 68,4132,2083,2083,END_OF_BLOCK,34 +}; + +/* tab Zero b.14 for 1-9 bits*/ +const UWORD16 gau2_impeg2d_tab_zero_1_9[] = +{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, + 0x0428, 0x0428, 0xfc28, 0xfc28, 0x0298, 0x0298, 0xfe98, 0xfe98, + 0x0808, 0x0808, 0xf808, 0xf808, 0x0288, 0x0288, 0xfe88, 0xfe88, + 0x0277, 0x0277, 0x0277, 0x0277, 0xfe77, 0xfe77, 0xfe77, 0xfe77, + 0x0267, 0x0267, 0x0267, 0x0267, 0xfe67, 0xfe67, 0xfe67, 0xfe67, + 0x0417, 0x0417, 0x0417, 0x0417, 0xfc17, 0xfc17, 0xfc17, 0xfc17, + 0x0257, 0x0257, 0x0257, 0x0257, 0xfe57, 0xfe57, 0xfe57, 0xfe57, + 0x02d9, 0xfed9, 0x0c09, 0xf409, 0x02c9, 0xfec9, 0x02b9, 0xfeb9, + 0x0439, 0xfc39, 0x0619, 0xfa19, 0x0a09, 0xf609, 0x02a9, 0xfea9, + 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, + 0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06, + 0x0246, 0x0246, 0x0246, 0x0246, 0x0246, 0x0246, 0x0246, 0x0246, + 0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46, + 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, + 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, + 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, + 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, + 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, + 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, + 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, + 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, + 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, + 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03 +}; + +const UWORD16 gau2_impeg2d_tab_zero_10_16[] = +{ + /*Six leading zeros*/ + 0x0300, 0x0300, 0xff00, 0xff00, 0x0450, 0x0450, 0xfc50, 0xfc50, + 0x0e00, 0x0e00, 0xf200, 0xf200, 0x0620, 0x0620, 0xfa20, 0xfa20, + 0x0810, 0x0810, 0xf810, 0xf810, 0x02f0, 0x02f0, 0xfef0, 0xfef0, + 0x02e0, 0x02e0, 0xfee0, 0xfee0, 0x0440, 0x0440, 0xfc40, 0xfc40, + /*Seven leading zeros*/ + 0x1602, 0xea02, 0x0482, 0xfc82, 0x0642, 0xfa42, 0x1402, 0xec02, + 0x0822, 0xf822, 0x0472, 0xfc72, 0x0352, 0xff52, 0x0342, 0xff42, + 0x1202, 0xee02, 0x0332, 0xff32, 0x0322, 0xff22, 0x0a12, 0xf612, + 0x0632, 0xfa32, 0x1002, 0xf002, 0x0462, 0xfc62, 0x0312, 0xff12, + /*Eight leading zeros*/ + 0x04a3, 0xfca3, 0x0493, 0xfc93, 0x0653, 0xfa53, 0x0833, 0xf833, + 0x0a23, 0xf623, 0x0e13, 0xf213, 0x0c13, 0xf413, 0x1e03, 0xe203, + 0x1c03, 0xe403, 0x1a03, 0xe603, 0x1803, 0xe803, 0x03a3, 0xffa3, + 0x0393, 0xff93, 0x0383, 0xff83, 0x0373, 0xff73, 0x0363, 0xff63, + /*Nine leading zeros*/ + 0x3e04, 0xc204, 0x3c04, 0xc404, 0x3a04, 0xc604, 0x3804, 0xc804, + 0x3604, 0xca04, 0x3404, 0xcc04, 0x3204, 0xce04, 0x3004, 0xd004, + 0x2e04, 0xd204, 0x2c04, 0xd404, 0x2a04, 0xd604, 0x2804, 0xd804, + 0x2604, 0xda04, 0x2404, 0xdc04, 0x2204, 0xde04, 0x2004, 0xe004, + /*Ten leading zeros*/ + 0x5005, 0xb005, 0x4e05, 0xb205, 0x4c05, 0xb405, 0x4a05, 0xb605, + 0x4805, 0xb805, 0x4605, 0xba05, 0x4405, 0xbc05, 0x4205, 0xbe05, + 0x4005, 0xc005, 0x1c15, 0xe415, 0x1a15, 0xe615, 0x1815, 0xe815, + 0x1615, 0xea15, 0x1415, 0xec15, 0x1215, 0xee15, 0x1015, 0xf015, + /*Eleven leading zeros*/ + 0x2416, 0xdc16, 0x2216, 0xde16, 0x2016, 0xe016, 0x1e16, 0xe216, + 0x0666, 0xfa66, 0x0506, 0xfd06, 0x04f6, 0xfcf6, 0x04e6, 0xfce6, + 0x04d6, 0xfcd6, 0x04c6, 0xfcc6, 0x04b6, 0xfcb6, 0x03f6, 0xfff6, + 0x03e6, 0xffe6, 0x03d6, 0xffd6, 0x03c6, 0xffc6, 0x03b6, 0xffb6 + +}; + +/* Table B.14 DCT Coefficients Table one */ +const UWORD16 gau2_impeg2d_dct_coeff_one[] = +{ + 2640,2608,2576,2544,12400,32848,30800,28752,26704,24656,22608,63536,61488, + 59440,57392,55344,1295,1263,1231,1199,1167,1135,1103,1071,1039,2511,2479, + 2447,2415,2383,2351,2319,1006,974,942,910,878,846,814,782,750,718,686,654, + 622,590,558,526,20557,18509,10349,6285,4269,2285,2253,0,0,0,0,53293,51245, + 49197,47149,45101,16460,8300,0,0,14412,43052,41004,0,38956,36908,0,6252, + 0,12364,34860,10313,10313,28713,28713,4234,32810,30761,30761,ESCAPE_CODE, + 14375,16423,12327,4167,230,198,8230,10278,2216,22568,360,328,26664,24616, + 6216,2184,4133,4133,4133,4133,4133,4133,4133,4133,2117,2117,2117,2117,2117, + 2117,2117,2117,6181,6181,6181,6181,6181,6181,6181,6181,2083,2083, + END_OF_BLOCK,100,18471,18471,2151,2151,20519,20519,263,263,295,295,392,424, + 4200,8264,456,488,34,34,34,34,34,34,34,34,67,67,67,67,133,165 +}; + +/* tab Zero b.15 for 1-9 bits*/ +const UWORD16 gau2_impeg2d_tab_one_1_9[] = +{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, + 0x0278, 0x0278, 0xfe78, 0xfe78, 0x0288, 0x0288, 0xfe88, 0xfe88, + 0x0268, 0x0268, 0xfe68, 0xfe68, 0x0428, 0x0428, 0xfc28, 0xfc28, + 0x0e07, 0x0e07, 0x0e07, 0x0e07, 0xf207, 0xf207, 0xf207, 0xf207, + 0x0c07, 0x0c07, 0x0c07, 0x0c07, 0xf407, 0xf407, 0xf407, 0xf407, + 0x0247, 0x0247, 0x0247, 0x0247, 0xfe47, 0xfe47, 0xfe47, 0xfe47, + 0x0257, 0x0257, 0x0257, 0x0257, 0xfe57, 0xfe57, 0xfe57, 0xfe57, + 0x0a19, 0xf619, 0x02b9, 0xfeb9, 0x1609, 0xea09, 0x1409, 0xec09, + 0x02d9, 0xfed9, 0x02c9, 0xfec9, 0x0439, 0xfc39, 0x0819, 0xf819, + 0x0226, 0x0226, 0x0226, 0x0226, 0x0226, 0x0226, 0x0226, 0x0226, + 0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26, + 0x0416, 0x0416, 0x0416, 0x0416, 0x0416, 0x0416, 0x0416, 0x0416, + 0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16, + 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, + 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, + 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, + 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, + 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, + 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, + 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, + 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, + 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, + 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, + 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, + 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, + 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, + 0x0806, 0x0806, 0x0806, 0x0806, 0x0806, 0x0806, 0x0806, 0x0806, + 0xf806, 0xf806, 0xf806, 0xf806, 0xf806, 0xf806, 0xf806, 0xf806, + 0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06, + 0xf606, 0xf606, 0xf606, 0xf606, 0xf606, 0xf606, 0xf606, 0xf606, + 0x0298, 0x0298, 0xfe98, 0xfe98, 0x0618, 0x0618, 0xfa18, 0xfa18, + 0x02a8, 0x02a8, 0xfea8, 0xfea8, 0x1008, 0x1008, 0xf008, 0xf008, + 0x1208, 0x1208, 0xee08, 0xee08, 0x1809, 0xe809, 0x1a09, 0xe609, + 0x0629, 0xfa29, 0x0449, 0xfc49, 0x1c09, 0xe409, 0x1e09, 0xe209 +}; + +const UWORD16 gau2_impeg2d_tab_one_10_16[] = +{ + /*Six leading zeros*/ + 0x0458, 0x0458, 0x0458, 0x0458, 0xfc58, 0xfc58, 0xfc58, 0xfc58, + 0x02e8, 0x02e8, 0x02e8, 0x02e8, 0xfee8, 0xfee8, 0xfee8, 0xfee8, + 0x0829, 0x0829, 0xf829, 0xf829, 0x0309, 0x0309, 0xff09, 0xff09, + 0x02f8, 0x02f8, 0x02f8, 0x02f8, 0xfef8, 0xfef8, 0xfef8, 0xfef8, + /*Seven leading zeros*/ + 0x000b, 0x000b, 0x048b, 0xfc8b, 0x064b, 0xfa4b, 0x000b, 0x000b, + 0x000b, 0x000b, 0x047b, 0xfc7b, 0x035b, 0xff5b, 0x034b, 0xff4b, + 0x000b, 0x000b, 0x033b, 0xff3b, 0x032b, 0xff2b, 0x000b, 0x000b, + 0x063b, 0xfa3b, 0x000b, 0x000b, 0x046b, 0xfc6b, 0x031b, 0xff1b, + /*Eight leading zeros*/ + 0x04ac, 0xfcac, 0x049c, 0xfc9c, 0x065c, 0xfa5c, 0x083c, 0xf83c, + 0x0a2c, 0xf62c, 0x0e1c, 0xf21c, 0x0c1c, 0xf41c, 0x000c, 0x000c, + 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x03ac, 0xffac, + 0x039c, 0xff9c, 0x038c, 0xff8c, 0x037c, 0xff7c, 0x036c, 0xff6c, + /*Nine leading zeros*/ + 0x3e0d, 0xc20d, 0x3c0d, 0xc40d, 0x3a0d, 0xc60d, 0x380d, 0xc80d, + 0x360d, 0xca0d, 0x340d, 0xcc0d, 0x320d, 0xce0d, 0x300d, 0xd00d, + 0x2e0d, 0xd20d, 0x2c0d, 0xd40d, 0x2a0d, 0xd60d, 0x280d, 0xd80d, + 0x260d, 0xda0d, 0x240d, 0xdc0d, 0x220d, 0xde0d, 0x200d, 0xe00d, + /*Ten leading zeros*/ + 0x500e, 0xb00e, 0x4e0e, 0xb20e, 0x4c0e, 0xb40e, 0x4a0e, 0xb60e, + 0x480e, 0xb80e, 0x460e, 0xba0e, 0x440e, 0xbc0e, 0x420e, 0xbe0e, + 0x400e, 0xc00e, 0x1c1e, 0xe41e, 0x1a1e, 0xe61e, 0x181e, 0xe81e, + 0x161e, 0xea1e, 0x141e, 0xec1e, 0x121e, 0xee1e, 0x101e, 0xf01e, + /*Eleven leading zeros*/ + 0x241f, 0xdc1f, 0x221f, 0xde1f, 0x201f, 0xe01f, 0x1e1f, 0xe21f, + 0x066f, 0xfa6f, 0x050f, 0xfd0f, 0x04ff, 0xfcff, 0x04ef, 0xfcef, + 0x04df, 0xfcdf, 0x04cf, 0xfccf, 0x04bf, 0xfcbf, 0x03ff, 0xffff, + 0x03ef, 0xffef, 0x03df, 0xffdf, 0x03cf, 0xffcf, 0x03bf, 0xffbf +}; + +/* Depending upon the various groups identified in the Ac Vld Coeffs the +following tables are used to index into the DCT Coefficients Tables Zero and +One defined above */ +const UWORD16 gau2_impeg2d_offset_zero[] = +{ + 0, + 1369, + 1434,1434, + 1048,1048,1048,1048, + 2012,2012,2012,2012,2012,2012,2012,2012, + 2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110, + 2110,2110, + 258, + 515,515, + 772,772,772,772, + 1158,1158,1158,1158,1158,1158,1158,1158, + 1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402, + 1402,1402, + 0, + 1 +}; + +const UWORD16 gau2_impeg2d_offset_one[] = +{ + 132, + 1386, + 1353,1353, + 1418,1418,1418,1418, + 1032,1032,1032,1032,1032,1032,1032,1032, + 1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996, + 1996,1996, + 1, + 258,258, + 515,515,515,515, + 756,756,756,756,756,756,756,756, + 1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142, + 1142,1142, + 0 +}; + + +/* Table to be used for decoding dct_dc_size_luminance */ +const WORD16 gai2_impeg2d_dct_dc_size[][11][2] = +{ + { + {3,1},{2,4},{-12,-9},{-11,-10},{-8,5},{-7,6},{-6,7},{-5,8},{-4,9},{-3,10},{-2,-1} + }, + { + {1,2},{-12,-11},{-10,3},{-9,4},{-8,5},{-7,6},{-6,7},{-5,8},{-4,9},{-3,10},{-2,-1} + } +}; + + diff --git a/decoder/impeg2d_vld_tables.h b/decoder/impeg2d_vld_tables.h new file mode 100644 index 0000000..75805af --- /dev/null +++ b/decoder/impeg2d_vld_tables.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2D_VLD_TABLES_H__ +#define __IMPEG2D_VLD_TABLES_H__ + + +#define MB_ADDR_INCR_OFFSET 34 +#define MB_ADDR_INCR_LEN 11 +#define MB_TYPE_LEN 6 +#define MV_CODE_LEN 11 +#define MB_CBP_LEN 9 + + + +#define MB_BIDRECT 0x20 +#define MB_QUANT 0x10 +#define MB_MV_FORW 0x8 +#define MB_MV_BACK 0x4 +#define MB_PATTERN 0x2 +#define MB_TYPE_INTRA 0x1 +#define MB_FORW_OR_BACK (MB_MV_FORW | MB_MV_BACK) +#define MB_CODED (MB_TYPE_INTRA | MB_PATTERN) + + +#define MPEG2_MB_ADDR_INCR_OFFSET 34 +#define MPEG2_INTRA_MBTYPE_OFFSET 69 +#define MPEG2_INTER_MBTYPE_OFFSET 105 +#define MPEG2_BVOP_MBTYPE_OFFSET 125 +#define MPEG2_DCT_DC_SIZE_OFFSET 12 +#define MPEG2_CBP_OFFSET 64 +#define MPEG2_MOTION_CODE_OFFSET 17 +#define MPEG2_DMV_OFFSET 2 + +#define MPEG2_AC_COEFF_MAX_LEN 16 +#define MB_ADDR_INCR_LEN 11 +#define MPEG2_INTRA_MBTYPE_LEN 2 +#define MPEG2_INTER_MBTYPE_LEN 6 + +#define MPEG2_DCT_DC_SIZE_LEN 9 +#define MPEG2_DCT_DC_LUMA_SIZE_LEN 9 +#define MPEG2_DCT_DC_CHROMA_SIZE_LEN 10 +#define MPEG2_CBP_LEN 9 +#define MPEG2_MOTION_CODE_LEN 11 +#define MPEG2_DMV_LEN 2 + +#define END_OF_BLOCK 0x01 +#define ESCAPE_CODE 0x06 + +/* Table to be used for decoding the MB increment value */ +extern const WORD16 gai2_impeg2d_mb_addr_incr[][2]; +extern const WORD16 gai2_impeg2d_dct_dc_size[][11][2]; + +extern const UWORD16 gau2_impeg2d_dct_coeff_zero[]; +extern const UWORD16 gau2_impeg2d_dct_coeff_one[]; +extern const UWORD16 gau2_impeg2d_offset_zero[]; +extern const UWORD16 gau2_impeg2d_offset_one[]; + +extern const UWORD16 gau2_impeg2d_tab_zero_1_9[]; +extern const UWORD16 gau2_impeg2d_tab_one_1_9[]; +extern const UWORD16 gau2_impeg2d_tab_zero_10_16[]; +extern const UWORD16 gau2_impeg2d_tab_one_10_16[]; + +extern const UWORD16 gau2_impeg2d_p_mb_type[]; +extern const UWORD16 gau2_impeg2d_b_mb_type[]; +extern const UWORD16 gau2_impeg2d_mv_code[]; +extern const WORD16 gai2_impeg2d_dec_mv[4]; +extern const UWORD16 gau2_impeg2d_cbp_code[]; + + +#endif /* __IMPEG2D_VLD_TABLES_H__ */ + diff --git a/decoder/ivd.h b/decoder/ivd.h new file mode 100644 index 0000000..abc6604 --- /dev/null +++ b/decoder/ivd.h @@ -0,0 +1,948 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ivd.h +* +* @brief +* This file contains all the necessary structure and enumeration +* definitions needed for the Application Program Interface(API) of the +* Ittiam Video Decoders +* +* @author +* 100239(RCY) +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef _IVD_H +#define _IVD_H + +/*****************************************************************************/ +/* Constant Macros */ +/*****************************************************************************/ +#define IVD_VIDDEC_MAX_IO_BUFFERS 64 +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Enums */ +/*****************************************************************************/ + +/* IVD_ARCH_T: Architecture Enumeration */ +typedef enum +{ + ARCH_NA = 0x7FFFFFFF, + ARCH_ARM_NONEON = 0x0, + ARCH_ARM_A9Q, + ARCH_ARM_A9A, + ARCH_ARM_A9, + ARCH_ARM_A7, + ARCH_ARM_A5, + ARCH_ARM_A15, + ARCH_ARM_NEONINTR, + ARCH_ARMV8_GENERIC, + ARCH_X86_GENERIC = 0x100, + ARCH_X86_SSSE3, + ARCH_X86_SSE42, + ARCH_X86_AVX2, + ARCH_MIPS_GENERIC = 0x200, + ARCH_MIPS_32 +}IVD_ARCH_T; + +/* IVD_SOC_T: SOC Enumeration */ +typedef enum +{ + SOC_NA = 0x7FFFFFFF, + SOC_GENERIC = 0x0, + SOC_HISI_37X = 0x100, +}IVD_SOC_T; + +/* IVD_FRAME_SKIP_MODE_T:Skip mode Enumeration */ + +typedef enum { + IVD_SKIP_NONE = 0x7FFFFFFF, + IVD_SKIP_P = 0x1, + IVD_SKIP_B = 0x2, + IVD_SKIP_I = 0x3, + IVD_SKIP_IP = 0x4, + IVD_SKIP_IB = 0x5, + IVD_SKIP_PB = 0x6, + IVD_SKIP_IPB = 0x7, + IVD_SKIP_IDR = 0x8, + IVD_SKIP_DEFAULT = IVD_SKIP_NONE, +}IVD_FRAME_SKIP_MODE_T; + +/* IVD_VIDEO_DECODE_MODE_T: Set decoder to decode either frame worth of data */ +/* or only header worth of data */ + +typedef enum { + IVD_DECODE_MODE_NA = 0x7FFFFFFF, + + /* This enables the codec to process all decodable units */ + IVD_DECODE_FRAME = 0x0, + + /* This enables the codec to decode header only */ + IVD_DECODE_HEADER = 0x1, + + + +}IVD_VIDEO_DECODE_MODE_T; + + +/* IVD_DISPLAY_FRAME_OUT_MODE_T: Video Display Frame Output Mode */ + +typedef enum { + + IVD_DISPLAY_ORDER_NA = 0x7FFFFFFF, + /* To set codec to fill output buffers in display order */ + IVD_DISPLAY_FRAME_OUT = 0x0, + + /* To set codec to fill output buffers in decode order */ + IVD_DECODE_FRAME_OUT = 0x1, +}IVD_DISPLAY_FRAME_OUT_MODE_T; + + +/* IVD_API_COMMAND_TYPE_T:API command type */ +typedef enum { + IVD_CMD_VIDEO_NA = 0x7FFFFFFF, + IVD_CMD_VIDEO_CTL = IV_CMD_DUMMY_ELEMENT + 1, + IVD_CMD_VIDEO_DECODE, + IVD_CMD_GET_DISPLAY_FRAME, + IVD_CMD_REL_DISPLAY_FRAME, + IVD_CMD_SET_DISPLAY_FRAME +}IVD_API_COMMAND_TYPE_T; + +/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type */ + +typedef enum { + IVD_CMD_NA = 0x7FFFFFFF, + IVD_CMD_CTL_GETPARAMS = 0x0, + IVD_CMD_CTL_SETPARAMS = 0x1, + IVD_CMD_CTL_RESET = 0x2, + IVD_CMD_CTL_SETDEFAULT = 0x3, + IVD_CMD_CTL_FLUSH = 0x4, + IVD_CMD_CTL_GETBUFINFO = 0x5, + IVD_CMD_CTL_GETVERSION = 0x6, + IVD_CMD_CTL_CODEC_SUBCMD_START = 0x7 +}IVD_CONTROL_API_COMMAND_TYPE_T; + + +/* IVD_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/ +/* code to the application. The first 8 bits starting from LSB have been */ +/* reserved for the codec to report internal error details. The rest of the */ +/* bits will be generic for all video decoders and each bit has an associated*/ +/* meaning as mentioned below. The unused bit fields are reserved for future */ +/* extenstions and will be zero in the current implementation */ + +typedef enum { + /* Bit 8 - Applied concealment. */ + IVD_APPLIEDCONCEALMENT = 0x8, + /* Bit 9 - Insufficient input data. */ + IVD_INSUFFICIENTDATA = 0x9, + /* Bit 10 - Data problem/corruption. */ + IVD_CORRUPTEDDATA = 0xa, + /* Bit 11 - Header problem/corruption. */ + IVD_CORRUPTEDHEADER = 0xb, + /* Bit 12 - Unsupported feature/parameter in input. */ + IVD_UNSUPPORTEDINPUT = 0xc, + /* Bit 13 - Unsupported input parameter orconfiguration. */ + IVD_UNSUPPORTEDPARAM = 0xd, + /* Bit 14 - Fatal error (stop the codec).If there is an */ + /* error and this bit is not set, the error is a recoverable one. */ + IVD_FATALERROR = 0xe, + /* Bit 15 - Invalid bitstream. Applies when Bitstream/YUV frame */ + /* buffer for encode/decode call is made with non-valid or zero u4_size */ + /* data */ + IVD_INVALID_BITSTREAM = 0xf, + /* Bit 16 */ + IVD_INCOMPLETE_BITSTREAM = 0x10, + IVD_ERROR_BITS_T_DUMMY_ELEMENT = 0x7FFFFFFF +}IVD_ERROR_BITS_T; + + +/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type */ +typedef enum { + IVD_ERROR_NONE = 0x0, + IVD_NUM_MEM_REC_FAILED = 0x1, + IVD_NUM_REC_NOT_SUFFICIENT = 0x2, + IVD_FILL_MEM_REC_FAILED = 0x3, + IVD_REQUESTED_WIDTH_NOT_SUPPPORTED = 0x4, + IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED = 0x5, + IVD_INIT_DEC_FAILED = 0x6, + IVD_INIT_DEC_NOT_SUFFICIENT = 0x7, + IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED = 0x8, + IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED = 0x9, + IVD_INIT_DEC_MEM_NOT_ALIGNED = 0xa, + IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED = 0xb, + IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT = 0xc, + IVD_GET_VERSION_DATABUFFER_SZ_INSUFFICIENT = 0xd, + IVD_BUFFER_SIZE_SET_TO_ZERO = 0xe, + IVD_UNEXPECTED_END_OF_STREAM = 0xf, + IVD_SEQUENCE_HEADER_NOT_DECODED = 0x10, + IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED = 0x11, + IVD_MAX_FRAME_LIMIT_REACHED = 0x12, + IVD_IP_API_STRUCT_SIZE_INCORRECT = 0x13, + IVD_OP_API_STRUCT_SIZE_INCORRECT = 0x14, + IVD_HANDLE_NULL = 0x15, + IVD_HANDLE_STRUCT_SIZE_INCORRECT = 0x16, + IVD_INVALID_HANDLE_NULL = 0x17, + IVD_INVALID_API_CMD = 0x18, + IVD_UNSUPPORTED_API_CMD = 0x19, + IVD_MEM_REC_STRUCT_SIZE_INCORRECT = 0x1a, + IVD_DISP_FRM_ZERO_OP_BUFS = 0x1b, + IVD_DISP_FRM_OP_BUF_NULL = 0x1c, + IVD_DISP_FRM_ZERO_OP_BUF_SIZE = 0x1d, + IVD_DEC_FRM_BS_BUF_NULL = 0x1e, + IVD_SET_CONFG_INVALID_DEC_MODE = 0x1f, + IVD_SET_CONFG_UNSUPPORTED_DISP_WIDTH = 0x20, + IVD_RESET_FAILED = 0x21, + IVD_INIT_DEC_MEM_REC_OVERLAP_ERR = 0x22, + IVD_INIT_DEC_MEM_REC_BASE_NULL = 0x23, + IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR = 0x24, + IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE = 0x25, + IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE = 0x26, + IVD_DEC_NUMBYTES_INV = 0x27, + IVD_DEC_REF_BUF_NULL = 0x28, + IVD_DEC_FRM_SKIPPED = 0x29, + IVD_RES_CHANGED = 0x2a, + IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS = 0x300, +}IVD_ERROR_CODES_T; + + +/*****************************************************************************/ +/* Structure */ +/*****************************************************************************/ +/* structure for passing output buffers to codec during get display buffer */ +/* call */ +typedef struct { + + /** + * number of output buffers + */ + UWORD32 u4_num_bufs; + + /** + *list of pointers to output buffers + */ + UWORD8 *pu1_bufs[IVD_VIDDEC_MAX_IO_BUFFERS]; + + /** + * sizes of each output buffer + */ + UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; + +}ivd_out_bufdesc_t; + +/*****************************************************************************/ +/* Initialize decoder */ +/*****************************************************************************/ + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_INIT */ + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * e_cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + *no memrecords which are allocated on request of codec through fill mem records + */ + UWORD32 u4_num_mem_rec; + /** + * maximum height for which codec should be initialized + */ + UWORD32 u4_frm_max_wd; + /** + * maximum width for which codec should be initialized + */ + UWORD32 u4_frm_max_ht; + /** + * format in which codec has to give out frame data for display + */ + IV_COLOR_FORMAT_T e_output_format; + /** + * pointer to memrecord array, which contains allocated resources + */ + iv_mem_rec_t *pv_mem_rec_location; +}ivd_init_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * u4_error_code + */ + UWORD32 u4_error_code; +}ivd_init_op_t; + + +/*****************************************************************************/ +/* Video Decode */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_DECODE */ + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * e_cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * u4_ts + */ + UWORD32 u4_ts; + + /** + * u4_num_Bytes + */ + UWORD32 u4_num_Bytes; + + /** + * pv_stream_buffer + */ + void *pv_stream_buffer; + + /** + * output buffer desc + */ + ivd_out_bufdesc_t s_out_buffer; + +}ivd_video_decode_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * u4_error_code + */ + UWORD32 u4_error_code; + + /** + * num_bytes_consumed + */ + UWORD32 u4_num_bytes_consumed; + + /** + * pic_wd + */ + UWORD32 u4_pic_wd; + + /** + * pic_ht + */ + UWORD32 u4_pic_ht; + + /** + * pic_type + */ + IV_PICTURE_CODING_TYPE_T e_pic_type; + + /** + * frame_decoded_flag + */ + UWORD32 u4_frame_decoded_flag; + + /** + * new_seq + */ + UWORD32 u4_new_seq; + + /** + * output_present + */ + UWORD32 u4_output_present; + + /** + * progressive_frame_flag + */ + UWORD32 u4_progressive_frame_flag; + + /** + * is_ref_flag + */ + UWORD32 u4_is_ref_flag; + + /** + * output_format + */ + IV_COLOR_FORMAT_T e_output_format; + + /** + * disp_frm_buf + */ + iv_yuv_buf_t s_disp_frm_buf; + + /** + * fld_type + */ + IV_FLD_TYPE_T e4_fld_type; + + /** + * ts + */ + UWORD32 u4_ts; + + /** + * disp_buf_id + */ + UWORD32 u4_disp_buf_id; +}ivd_video_decode_op_t; + + +/*****************************************************************************/ +/* Get Display Frame */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_GET_DISPLAY_FRAME */ + +typedef struct +{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * e_cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * output buffer desc + */ + ivd_out_bufdesc_t s_out_buffer; + +}ivd_get_display_frame_ip_t; + + +typedef struct +{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error_code + */ + UWORD32 u4_error_code; + + /** + * progressive_frame_flag + */ + UWORD32 u4_progressive_frame_flag; + + /** + * pic_type + */ + IV_PICTURE_CODING_TYPE_T e_pic_type; + + /** + * is_ref_flag + */ + UWORD32 u4_is_ref_flag; + + /** + * output_format + */ + IV_COLOR_FORMAT_T e_output_format; + + /** + * disp_frm_buf + */ + iv_yuv_buf_t s_disp_frm_buf; + + /** + * fld_type + */ + IV_FLD_TYPE_T e4_fld_type; + + /** + * ts + */ + UWORD32 u4_ts; + + /** + * disp_buf_id + */ + UWORD32 u4_disp_buf_id; +}ivd_get_display_frame_op_t; + +/*****************************************************************************/ +/* Set Display Frame */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME */ + +typedef struct +{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * num_disp_bufs + */ + UWORD32 num_disp_bufs; + + /** + * output buffer desc + */ + ivd_out_bufdesc_t s_disp_buffer[IVD_VIDDEC_MAX_IO_BUFFERS]; + +}ivd_set_display_frame_ip_t; + + +typedef struct +{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; +}ivd_set_display_frame_op_t; + + +/*****************************************************************************/ +/* Release Display Frame */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME */ + +typedef struct +{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * e_cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * disp_buf_id + */ + UWORD32 u4_disp_buf_id; +}ivd_rel_display_frame_ip_t; + + +typedef struct +{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; +}ivd_rel_display_frame_op_t; + +/*****************************************************************************/ +/* Video control Flush */ +/*****************************************************************************/ +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd = IVD_CMD_ctl_FLUSH */ + + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub_cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_flush_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; +}ivd_ctl_flush_op_t; + +/*****************************************************************************/ +/* Video control reset */ +/*****************************************************************************/ +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd = IVD_CMD_ctl_RESET */ + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub_cmd + */ + + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_reset_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; +}ivd_ctl_reset_op_t; + + +/*****************************************************************************/ +/* Video control Set Params */ +/*****************************************************************************/ +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETPARAMS */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETDEFAULT */ + + + +typedef struct { + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub_cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** + * vid_dec_mode + */ + IVD_VIDEO_DECODE_MODE_T e_vid_dec_mode; + + /** + * disp_wd + */ + UWORD32 u4_disp_wd; + + /** + * frm_skip_mode + */ + IVD_FRAME_SKIP_MODE_T e_frm_skip_mode; + + /** + * frm_out_mode + */ + IVD_DISPLAY_FRAME_OUT_MODE_T e_frm_out_mode; +}ivd_ctl_set_config_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * u4_error_code + */ + UWORD32 u4_error_code; +}ivd_ctl_set_config_op_t; + +/*****************************************************************************/ +/* Video control:Get Buf Info */ +/*****************************************************************************/ + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETBUFINFO */ + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * e_cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub_cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_getbufinfo_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; + + /** + * no of display buffer sets required by codec + */ + UWORD32 u4_num_disp_bufs; + + /** + * no of input buffers required for codec + */ + UWORD32 u4_min_num_in_bufs; + + /** + * no of output buffers required for codec + */ + UWORD32 u4_min_num_out_bufs; + + /** + * sizes of each input buffer required + */ + UWORD32 u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; + + /** + * sizes of each output buffer required + */ + UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; +}ivd_ctl_getbufinfo_op_t; + + +/*****************************************************************************/ +/* Video control:Getstatus Call */ +/*****************************************************************************/ + + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETPARAMS */ + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub_cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; +}ivd_ctl_getstatus_ip_t; + + +typedef struct{ + + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; + + /** + * no of display buffer sets required by codec + */ + UWORD32 u4_num_disp_bufs; + + /** + * u4_pic_ht + */ + UWORD32 u4_pic_ht; + + /** + * u4_pic_wd + */ + UWORD32 u4_pic_wd; + + /** + * frame_rate + */ + UWORD32 u4_frame_rate; + + /** + * u4_bit_rate + */ + UWORD32 u4_bit_rate; + + /** + * content_type + */ + IV_CONTENT_TYPE_T e_content_type; + + /** + * output_chroma_format + */ + IV_COLOR_FORMAT_T e_output_chroma_format; + + /** + * no of input buffers required for codec + */ + UWORD32 u4_min_num_in_bufs; + + /** + * no of output buffers required for codec + */ + UWORD32 u4_min_num_out_bufs; + + /** + * sizes of each input buffer required + */ + UWORD32 u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; + + /** + * sizes of each output buffer required + */ + UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS]; +}ivd_ctl_getstatus_op_t; + + +/*****************************************************************************/ +/* Video control:Get Version Info */ +/*****************************************************************************/ + +/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */ +/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETVERSION */ + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * cmd + */ + IVD_API_COMMAND_TYPE_T e_cmd; + + /** + * sub_cmd + */ + IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd; + + /** + * pv_version_buffer + */ + void *pv_version_buffer; + + /** + * version_buffer_size + */ + UWORD32 u4_version_buffer_size; +}ivd_ctl_getversioninfo_ip_t; + + +typedef struct{ + /** + * u4_size of the structure + */ + UWORD32 u4_size; + + /** + * error code + */ + UWORD32 u4_error_code; +}ivd_ctl_getversioninfo_op_t; + +#endif /* __IVD_H__ */ + diff --git a/decoder/mips/impeg2d_function_selector.c b/decoder/mips/impeg2d_function_selector.c new file mode 100644 index 0000000..a72c1f9 --- /dev/null +++ b/decoder/mips/impeg2d_function_selector.c @@ -0,0 +1,83 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "iv_datatypedef.h" +#include "iv.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "ivd.h" +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_vld_tables.h" +#include "impeg2d_vld.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_debug.h" +#include "impeg2d_mc.h" + +void impeg2d_init_function_ptr_generic(void *pv_codec); + +void impeg2d_init_function_ptr(void *pv_codec) +{ + dec_state_t *ps_codec = (dec_state_t *)pv_codec; + + impeg2d_init_function_ptr_generic(ps_codec); +} + +void impeg2d_init_arch(void *pv_codec) +{ + dec_state_t *ps_codec = (dec_state_t *)pv_codec; + + ps_codec->e_processor_arch = ARCH_NA; +} diff --git a/decoder/x86/impeg2d_function_selector.c b/decoder/x86/impeg2d_function_selector.c new file mode 100755 index 0000000..ddadb02 --- /dev/null +++ b/decoder/x86/impeg2d_function_selector.c @@ -0,0 +1,117 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User include files */ +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "ithread.h" + +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_debug.h" +#include "impeg2d_structs.h" + +void impeg2d_init_function_ptr_generic(void *pv_codec); +void impeg2d_init_function_ptr_ssse3(void *pv_codec); +void impeg2d_init_function_ptr_sse42(void *pv_codec); +void impeg2d_init_function_ptr_avx2(void *pv_codec); + +void impeg2d_init_function_ptr(void *pv_codec) +{ + dec_state_t *ps_codec = (dec_state_t *)pv_codec; + + impeg2d_init_function_ptr_generic(pv_codec); + switch(ps_codec->e_processor_arch) + { + case ARCH_X86_GENERIC: + impeg2d_init_function_ptr_generic(pv_codec); + break; + case ARCH_X86_SSSE3: + impeg2d_init_function_ptr_ssse3(pv_codec); + break; + case ARCH_X86_SSE42: + impeg2d_init_function_ptr_sse42(pv_codec); + break; + case ARCH_X86_AVX2: +#ifndef DISABLE_AVX2 + impeg2d_init_function_ptr_avx2(pv_codec); +#else + impeg2d_init_function_ptr_sse42(pv_codec); +#endif + break; + default: + impeg2d_init_function_ptr_sse42(pv_codec); + break; + } +} +void impeg2d_init_arch(void *pv_codec) +{ + dec_state_t *ps_codec = (dec_state_t*) pv_codec; + +#ifdef DEFAULT_ARCH +#if DEFAULT_ARCH == D_ARCH_X86_SSE42 + ps_codec->e_processor_arch = ARCH_X86_SSE42; +#elif DEFAULT_ARCH == D_ARCH_X86_SSSE3 + ps_codec->e_processor_arch = ARCH_X86_SSSE3; +#elif DEFAULT_ARCH == D_ARCH_X86_AVX2 + ps_codec->e_processor_arch = D_ARCH_X86_AVX2; +#else + ps_codec->e_processor_arch = ARCH_X86_GENERIC; +#endif +#else + ps_codec->e_processor_arch = ARCH_X86_SSE42; +#endif + +} diff --git a/decoder/x86/impeg2d_function_selector_avx2.c b/decoder/x86/impeg2d_function_selector_avx2.c new file mode 100644 index 0000000..ab5d847 --- /dev/null +++ b/decoder/x86/impeg2d_function_selector_avx2.c @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User include files */ +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "ithread.h" + +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_debug.h" +#include "impeg2d_structs.h" + +void impeg2d_init_function_ptr_avx2(void *pv_codec) +{ + dec_state_t *dec = (dec_state_t *)pv_codec; + + dec->pf_idct_recon[0] = &impeg2_idct_recon_dc; + dec->pf_idct_recon[1] = &impeg2_idct_recon_dc_mismatch; + dec->pf_idct_recon[2] = &impeg2_idct_recon; + dec->pf_idct_recon[3] = &impeg2_idct_recon; +} diff --git a/decoder/x86/impeg2d_function_selector_sse42.c b/decoder/x86/impeg2d_function_selector_sse42.c new file mode 100644 index 0000000..a4b6673 --- /dev/null +++ b/decoder/x86/impeg2d_function_selector_sse42.c @@ -0,0 +1,84 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User include files */ +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "ithread.h" + +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_debug.h" +#include "impeg2d_structs.h" + +void impeg2d_init_function_ptr_sse42(void *pv_codec) +{ + dec_state_t *dec = (dec_state_t *)pv_codec; + + dec->pf_idct_recon[0] = &impeg2_idct_recon_dc_sse42; + dec->pf_idct_recon[1] = &impeg2_idct_recon_dc_mismatch_sse42; + dec->pf_idct_recon[2] = &impeg2_idct_recon_sse42; + dec->pf_idct_recon[3] = &impeg2_idct_recon_sse42; + + dec->pf_copy_mb = &impeg2_copy_mb_sse42; + dec->pf_interpolate = &impeg2_interpolate_sse42; + dec->pf_halfx_halfy_8x8 = &impeg2_mc_halfx_halfy_8x8_sse42; + dec->pf_halfx_fully_8x8 = &impeg2_mc_halfx_fully_8x8_sse42; + dec->pf_fullx_halfy_8x8 = &impeg2_mc_fullx_halfy_8x8_sse42; + dec->pf_fullx_fully_8x8 = &impeg2_mc_fullx_fully_8x8_sse42; + + dec->pf_memset_8bit_8x8_block = &impeg2_memset_8bit_8x8_block_sse42; + dec->pf_memset_16bit_8x8_linear_block = &impeg2_memset0_16bit_8x8_linear_block_sse42; +} diff --git a/decoder/x86/impeg2d_function_selector_ssse3.c b/decoder/x86/impeg2d_function_selector_ssse3.c new file mode 100644 index 0000000..fb6c345 --- /dev/null +++ b/decoder/x86/impeg2d_function_selector_ssse3.c @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* impeg2d_function_selector.c +* +* @brief +* Contains functions to initialize function pointers used in mpeg2 +* +* @author +* Naveen +* +* @par List of Functions: +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +/* User include files */ +#include "iv_datatypedef.h" +#include "iv.h" +#include "ivd.h" +#include "ithread.h" + +#include "impeg2_macros.h" +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_format_conv.h" +#include "impeg2_mem_func.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_debug.h" +#include "impeg2d_structs.h" + +void impeg2d_init_function_ptr_ssse3(void *pv_codec) +{ + dec_state_t *dec = (dec_state_t *)pv_codec; + + dec->pf_idct_recon[0] = &impeg2_idct_recon_dc; + dec->pf_idct_recon[1] = &impeg2_idct_recon_dc_mismatch; + dec->pf_idct_recon[2] = &impeg2_idct_recon; + dec->pf_idct_recon[3] = &impeg2_idct_recon; +} diff --git a/test/Android.mk b/test/Android.mk new file mode 100644 index 0000000..7807003 --- /dev/null +++ b/test/Android.mk @@ -0,0 +1,5 @@ +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# decoder +include $(LOCAL_PATH)/decoder.mk diff --git a/test/decoder.mk b/test/decoder.mk new file mode 100644 index 0000000..2aef0f9 --- /dev/null +++ b/test/decoder.mk @@ -0,0 +1,13 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := mpeg2dec +LOCAL_MODULE_TAGS := optional + +LOCAL_CFLAGS := -DPROFILE_ENABLE -DMD5_DISABLE -DARM -fPIC +LOCAL_C_INCLUDES += $(LOCAL_PATH)/../decoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/decoder/ +LOCAL_SRC_FILES := decoder/main.c +LOCAL_STATIC_LIBRARIES := libmpeg2dec + +include $(BUILD_EXECUTABLE) diff --git a/test/decoder/main.c b/test/decoder/main.c new file mode 100644 index 0000000..5930bd1 --- /dev/null +++ b/test/decoder/main.c @@ -0,0 +1,3135 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/*****************************************************************************/ +/* */ +/* File Name : main.c */ +/* */ +/* Description : Contains an application that demonstrates use of HEVC*/ +/* decoder API */ +/* */ +/* List of Functions : */ +/* */ +/* Issues / Problems : None */ +/* */ +/* Revision History : */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 Harish Initial Version */ +/*****************************************************************************/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#ifdef X86_MINGW +#include <signal.h> +#endif + +#ifndef IOS +#include <malloc.h> +#endif +#ifdef IOS_DISPLAY +#include "cast_types.h" +#else +#include "iv_datatypedef.h" +#endif + +#include "iv.h" +#include "ivd.h" +#include "impeg2d.h" +#include "ithread.h" + +#ifdef WINDOWS_TIMER +#include <windows.h> +#else +#include <sys/time.h> +#endif + +#define ALIGN8(x) ((((x) + 7) >> 3) << 3) +#define NUM_DISPLAY_BUFFERS 4 +#define DEFAULT_FPS 30 + + +#define ENABLE_DEGRADE 0 +#define MAX_DISP_BUFFERS 64 +#define EXTRA_DISP_BUFFERS 0 +#define STRLENGTH 1000 + +//#define TEST_FLUSH +#define FLUSH_FRM_CNT 100 + + +#ifdef IOS +#define PATHLENMAX 500 +char filename_with_path[PATHLENMAX]; +#endif + +#ifdef PROFILE_ENABLE +#ifdef WINDOWS_TIMER +typedef LARGE_INTEGER TIMER; +#else +typedef struct timeval TIMER; +#endif +#else +typedef WORD32 TIMER; +#endif + +#ifdef PROFILE_ENABLE +#ifdef WINDOWS_TIMER +#define GETTIME(timer) QueryPerformanceCounter(timer); +#else +#define GETTIME(timer) gettimeofday(timer,NULL); +#endif + +#ifdef WINDOWS_TIMER +#define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \ +{ \ + TIMER s_temp_time; \ + s_temp_time.LowPart = s_end_timer.LowPart - s_start_timer.LowPart ; \ + s_elapsed_time = (UWORD32) ( ((DOUBLE)s_temp_time.LowPart / (DOUBLE)frequency.LowPart ) * 1000000); \ +} +#else +#define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \ + s_elapsed_time = ((s_end_timer.tv_sec - s_start_timer.tv_sec) * 1000000) + (s_end_timer.tv_usec - s_start_timer.tv_usec); +#endif + +#else +#define GETTIME(timer) +#define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) +#endif + + +/* Function declarations */ +#ifndef MD5_DISABLE +void calc_md5_cksum(UWORD8 *pu1_inbuf, UWORD32 u4_stride, UWORD32 u4_width, UWORD32 u4_height, UWORD8 *pu1_cksum_p); +#else +#define calc_md5_cksum(a, b, c, d, e) +#endif +#ifdef SDL_DISPLAY +void* sdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void sdl_alloc_disp_buffers(void *); +void sdl_display(void *, WORD32); +void sdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void sdl_disp_deinit(void *); +void sdl_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T sdl_get_color_fmt(void); +UWORD32 sdl_get_stride(void); +#endif + +#ifdef INTEL_CE5300 +void* gdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void gdl_alloc_disp_buffers(void *); +void gdl_display(void *, WORD32); +void gdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void gdl_disp_deinit(void *); +void gdl_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T gdl_get_color_fmt(void); +UWORD32 gdl_get_stride(void); +#endif + +#ifdef FBDEV_DISPLAY +void* fbd_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void fbd_alloc_disp_buffers(void *); +void fbd_display(void *, WORD32); +void fbd_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void fbd_disp_deinit(void *); +void fbd_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T fbd_get_color_fmt(void); +UWORD32 fbd_get_stride(void); +#endif + +#ifdef IOS_DISPLAY +void* ios_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); +void ios_alloc_disp_buffers(void *); +void ios_display(void *, WORD32); +void ios_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); +void ios_disp_deinit(void *); +void ios_disp_usleep(UWORD32); +IV_COLOR_FORMAT_T ios_get_color_fmt(void); +UWORD32 ios_get_stride(void); +#endif + +typedef struct +{ + UWORD32 u4_piclen_flag; + UWORD32 u4_file_save_flag; + UWORD32 u4_chksum_save_flag; + UWORD32 u4_max_frm_ts; + IV_COLOR_FORMAT_T e_output_chroma_format; + IVD_ARCH_T e_arch; + IVD_SOC_T e_soc; + UWORD32 dump_q_rd_idx; + UWORD32 dump_q_wr_idx; + WORD32 disp_q_wr_idx; + WORD32 disp_q_rd_idx; + + void *cocodec_obj; + UWORD32 share_disp_buf; + UWORD32 num_disp_buf; + UWORD32 b_pic_present; + WORD32 i4_degrade_type; + WORD32 i4_degrade_pics; + UWORD32 u4_num_cores; + UWORD32 disp_delay; + WORD32 trace_enable; + CHAR ac_trace_fname[STRLENGTH]; + CHAR ac_piclen_fname[STRLENGTH]; + CHAR ac_ip_fname[STRLENGTH]; + CHAR ac_op_fname[STRLENGTH]; + CHAR ac_op_chksum_fname[STRLENGTH]; + ivd_out_bufdesc_t s_disp_buffers[MAX_DISP_BUFFERS]; + iv_yuv_buf_t s_disp_frm_queue[MAX_DISP_BUFFERS]; + UWORD32 s_disp_frm_id_queue[MAX_DISP_BUFFERS]; + UWORD32 loopback; + UWORD32 display; + UWORD32 full_screen; + UWORD32 fps; + UWORD32 max_wd; + UWORD32 max_ht; + UWORD32 max_level; + + UWORD32 u4_strd; + + /* For signalling to display thread */ + UWORD32 u4_pic_wd; + UWORD32 u4_pic_ht; + + /* For IOS diplay */ + WORD32 i4_screen_wd; + WORD32 i4_screen_ht; + + //UWORD32 u4_output_present; + WORD32 quit; + WORD32 paused; + + + void *pv_disp_ctx; + void *display_thread_handle; + WORD32 display_thread_created; + volatile WORD32 display_init_done; + volatile WORD32 display_deinit_flag; + + void* (*disp_init)(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *); + void (*alloc_disp_buffers)(void *); + void (*display_buffer)(void *, WORD32); + void (*set_disp_buffers)(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **); + void (*disp_deinit)(void *); + void (*disp_usleep)(UWORD32); + IV_COLOR_FORMAT_T(*get_color_fmt)(void); + UWORD32(*get_stride)(void); +}vid_dec_ctx_t; + + + +typedef enum +{ + INVALID, + HELP, + VERSION, + INPUT_FILE, + OUTPUT, + CHKSUM, + SAVE_OUTPUT, + SAVE_CHKSUM, + CHROMA_FORMAT, + NUM_FRAMES, + NUM_CORES, + + SHARE_DISPLAY_BUF, + LOOPBACK, + DISPLAY, + FULLSCREEN, + FPS, + TRACE, + MAX_WD, + MAX_HT, + MAX_LEVEL, + CONFIG, + + DEGRADE_TYPE, + DEGRADE_PICS, + ARCH, + SOC, + PICLEN, + PICLEN_FILE, +}ARGUMENT_T; + +typedef struct +{ + CHAR argument_shortname[4]; + CHAR argument_name[128]; + ARGUMENT_T argument; + CHAR description[512]; +}argument_t; + +static const argument_t argument_mapping[] = +{ + { "-h", "--help", HELP, + "Print this help\n" }, + { "-c", "--config", CONFIG, + "config file (Default: test.cfg)\n" }, + + { "-v", "--version", VERSION, + "Version information\n" }, + { "-i", "--input", INPUT_FILE, + "Input file\n" }, + { "-o", "--output", OUTPUT, + "Output file\n" }, + { "--", "--piclen", PICLEN, + "Flag to signal if the decoder has to use a file containing number of bytes in each picture to be fed in each call\n" }, + { "--", "--piclen_file", PICLEN_FILE, + "File containing number of bytes in each picture - each line containing one size\n" }, + { "--", "--chksum", CHKSUM, + "Output MD5 Checksum file\n" }, + { "-s", "--save_output", SAVE_OUTPUT, + "Save Output file\n" }, + { "--", "--save_chksum", SAVE_CHKSUM, + "Save Check sum file\n" }, + { "--", "--chroma_format", CHROMA_FORMAT, + "Output Chroma format Supported values YUV_420P, YUV_422ILE, RGB_565, YUV_420SP_UV, YUV_420SP_VU\n" }, + { "-n", "--num_frames", NUM_FRAMES, + "Number of frames to be decoded\n" }, + { "--", "--num_cores", NUM_CORES, + "Number of cores to be used\n" }, + { "--", "--share_display_buf", SHARE_DISPLAY_BUF, + "Enable shared display buffer mode\n" }, + + { "--", "--loopback", LOOPBACK, + "Enable playback in a loop\n" }, + { "--", "--display", DISPLAY, + "Enable display (uses SDL)\n" }, + { "--", "--fullscreen", FULLSCREEN, + "Enable full screen (Only for GDL and SDL)\n" }, + { "--", "--fps", FPS, + "FPS to be used for display \n" }, + { "-i", "--trace", TRACE, + "Trace file\n" }, + { "--", "--max_wd", MAX_WD, + "Maximum width (Default: 2560) \n" }, + { "--", "--max_ht", MAX_HT, + "Maximum height (Default: 1600)\n" }, + { "--", "--arch", ARCH, + "Set Architecture. Supported values ARM_NONEON, ARM_A9Q, ARM_A7, ARM_A5, ARM_NEONINTR, X86_GENERIC, X86_SSSE3, X86_SSE4 \n" }, + { "--", "--soc", SOC, + "Set SOC. Supported values GENERIC, HISI_37X \n" }, + +#if 0 + { "--", "--degrade_type", DEGRADE_TYPE, + "Degrade type : 0: No degrade 0th bit set : Disable SAO 1st bit set : Disable deblocking 2nd bit set : Faster inter prediction filters 3rd bit set : Fastest inter prediction filters\n" }, + { "--", "--degrade_pics", DEGRADE_PICS, + "Degrade pics : 0 : No degrade 1 : Only on non-reference frames 2 : Do not degrade every 4th or key frames 3 : All non-key frames 4 : All frames" }, + + { "--", "--max_level", MAX_LEVEL, + "Maximum Decoder Level (Default: 50)\n" }, +#endif +}; + +#define PEAK_WINDOW_SIZE 8 +#define MAX_FRAME_WIDTH 2560 +#define MAX_FRAME_HEIGHT 1600 +#define MAX_LEVEL_SUPPORTED 50 +#define MAX_REF_FRAMES 16 +#define MAX_REORDER_FRAMES 16 +#define DEFAULT_SHARE_DISPLAY_BUF 0 +#define STRIDE 0 +#define DEFAULT_NUM_CORES 1 + +#define DUMP_SINGLE_BUF 0 +#define IV_ISFATALERROR(x) (((x) >> IVD_FATALERROR) & 0x1) + +#define ivd_api_function impeg2d_api_function + +#ifdef IOS +char filename_trace[PATHLENMAX]; +#endif + +#if ANDROID_NDK +/*****************************************************************************/ +/* */ +/* Function Name : raise */ +/* */ +/* Description : Needed as a workaround when the application is built in */ +/* Android NDK. This is an exception to be called for divide*/ +/* by zero error */ +/* */ +/* Inputs : a */ +/* Globals : */ +/* Processing : None */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ +int raise(int a) +{ + printf("Divide by zero\n"); + return 0; +} +#endif + +#ifdef _WIN32 +/*****************************************************************************/ +/* Function to print library calls */ +/*****************************************************************************/ +/*****************************************************************************/ +/* */ +/* Function Name : memalign */ +/* */ +/* Description : Returns malloc data. Ideally should return aligned memory*/ +/* support alignment will be added later */ +/* */ +/* Inputs : alignment */ +/* size */ +/* Globals : */ +/* Processing : */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void* app_aligned_malloc(WORD32 alignment, WORD32 size) +{ + return (void *)_aligned_malloc(size, alignment); +} + +void app_aligned_free(void *pv_buf) +{ + _aligned_free(pv_buf); + return; +} +#endif + +#if IOS +void* app_aligned_malloc(WORD32 alignment, WORD32 size) +{ + return malloc(size); +} + +void app_aligned_free(void *pv_buf) +{ + free(pv_buf); + return; +} +#endif + +#if (!defined(IOS)) && (!defined(_WIN32)) +void* app_aligned_malloc(WORD32 alignment, WORD32 size) +{ + return memalign(alignment, size); +} + +void app_aligned_free(void *pv_buf) +{ + free(pv_buf); + return; +} +#endif + +/*****************************************************************************/ +/* */ +/* Function Name : set_degrade */ +/* */ +/* Description : Control call to set degrade level */ +/* */ +/* */ +/* Inputs : codec_obj - Codec Handle */ +/* type - degrade level value between 0 to 4 */ +/* 0 : No degrade */ +/* 1st bit : Disable SAO */ +/* 2nd bit : Disable Deblock */ +/* 3rd bit : Faster MC for non-ref */ +/* 4th bit : Fastest MC for non-ref */ +/* pics - Pictures that are are degraded */ +/* 0 : No degrade */ +/* 1 : Non-ref pictures */ +/* 2 : Pictures at given interval are not degraded */ +/* 3 : All non-key pictures */ +/* 4 : All pictures */ +/* Globals : */ +/* Processing : Calls degrade control to the codec */ +/* */ +/* Outputs : */ +/* Returns : Control call return status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T set_degrade(void *codec_obj, UWORD32 type, WORD32 pics) +{ + IV_API_CALL_STATUS_T e_dec_status = IV_SUCCESS; +#if 0 + impeg2d_ctl_degrade_ip_t s_ctl_ip; + impeg2d_ctl_degrade_op_t s_ctl_op; + void *pv_api_ip, *pv_api_op; + + + s_ctl_ip.u4_size = sizeof(impeg2d_ctl_degrade_ip_t); + s_ctl_ip.i4_degrade_type = type; + s_ctl_ip.i4_nondegrade_interval = 4; + s_ctl_ip.i4_degrade_pics = pics; + + s_ctl_op.u4_size = sizeof(impeg2d_ctl_degrade_op_t); + + pv_api_ip = (void *)&s_ctl_ip; + pv_api_op = (void *)&s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_DEGRADE; + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, pv_api_ip, pv_api_op); + + if(IV_SUCCESS != e_dec_status) + { + printf("Error in setting degrade level \n"); + } +#endif + ((void)(codec_obj)); + ((void)(type)); + ((void)(pics)); + return (e_dec_status); + +} + +/*****************************************************************************/ +/* */ +/* Function Name : enable_skipb_frames */ +/* */ +/* Description : Control call to enable skipping of b frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls enable skip B frames control */ +/* */ +/* Outputs : */ +/* Returns : Control call return status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T enable_skipb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_B; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Enable SkipB frames \n"); + } + + return e_dec_status; +} +/*****************************************************************************/ +/* */ +/* Function Name : disable_skipb_frames */ +/* */ +/* Description : Control call to disable skipping of b frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls disable B frame skip control */ +/* */ +/* Outputs : */ +/* Returns : Control call return status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T disable_skipb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Disable SkipB frames\n"); + } + + return e_dec_status; +} + +/*****************************************************************************/ +/* */ +/* Function Name : enable_skippb_frames */ +/* */ +/* Description : Control call to enable skipping of P & B frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls enable skip P and B frames control */ +/* */ +/* Outputs : */ +/* Returns : Control call return status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T enable_skippb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_PB; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Enable SkipPB frames\n"); + } + + return e_dec_status; +} + +/*****************************************************************************/ +/* */ +/* Function Name : disable_skippb_frames */ +/* */ +/* Description : Control call to disable skipping of P and B frames */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls disable P and B frame skip control */ +/* */ +/* Outputs : */ +/* Returns : Control call return status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T disable_skippb_frames(void *codec_obj, + vid_dec_ctx_t *ps_app_ctx) +{ + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd; + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Disable SkipPB frames\n"); + } + + return e_dec_status; +} + +/*****************************************************************************/ +/* */ +/* Function Name : release_disp_frame */ +/* */ +/* Description : Calls release display control - Used to signal to the */ +/* decoder that this particular buffer has been displayed */ +/* and that the codec is now free to write to this buffer */ +/* */ +/* */ +/* Inputs : codec_obj : Codec Handle */ +/* buf_id : Buffer Id of the buffer to be released */ +/* This id would have been returned earlier by */ +/* the codec */ +/* Globals : */ +/* Processing : Calls Release Display call */ +/* */ +/* Outputs : */ +/* Returns : Status of release display call */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T release_disp_frame(void *codec_obj, UWORD32 buf_id) +{ + ivd_rel_display_frame_ip_t s_video_rel_disp_ip; + ivd_rel_display_frame_op_t s_video_rel_disp_op; + IV_API_CALL_STATUS_T e_dec_status; + + s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; + s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); + s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); + s_video_rel_disp_ip.u4_disp_buf_id = buf_id; + + e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_rel_disp_ip, + (void *)&s_video_rel_disp_op); + if(IV_SUCCESS != e_dec_status) + { + printf("Error in Release Disp frame\n"); + } + + + return (e_dec_status); +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_version */ +/* */ +/* Description : Control call to get codec version */ +/* */ +/* */ +/* Inputs : codec_obj : Codec handle */ +/* Globals : */ +/* Processing : Calls enable skip B frames control */ +/* */ +/* Outputs : */ +/* Returns : Control call return status */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +IV_API_CALL_STATUS_T get_version(void *codec_obj) +{ + ivd_ctl_getversioninfo_ip_t s_ctl_dec_ip; + ivd_ctl_getversioninfo_op_t s_ctl_dec_op; + UWORD8 au1_buf[512]; + IV_API_CALL_STATUS_T status; + s_ctl_dec_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_dec_ip.e_sub_cmd = IVD_CMD_CTL_GETVERSION; + s_ctl_dec_ip.u4_size = sizeof(ivd_ctl_getversioninfo_ip_t); + s_ctl_dec_op.u4_size = sizeof(ivd_ctl_getversioninfo_op_t); + s_ctl_dec_ip.pv_version_buffer = au1_buf; + s_ctl_dec_ip.u4_version_buffer_size = sizeof(au1_buf); + + status = ivd_api_function((iv_obj_t *)codec_obj, + (void *)&(s_ctl_dec_ip), + (void *)&(s_ctl_dec_op)); + + if(status != IV_SUCCESS) + { + printf("Error in Getting Version number e_dec_status = %d u4_error_code = %x\n", + status, s_ctl_dec_op.u4_error_code); + } + else + { + printf("Ittiam Decoder Version number: %s\n", + (char *)s_ctl_dec_ip.pv_version_buffer); + } + return status; +} +/*****************************************************************************/ +/* */ +/* Function Name : codec_exit */ +/* */ +/* Description : handles unrecoverable errors */ +/* Inputs : Error message */ +/* Globals : None */ +/* Processing : Prints error message to console and exits. */ +/* Outputs : Error mesage to the console */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +void codec_exit(CHAR *pc_err_message) +{ + printf("%s\n", pc_err_message); + exit(-1); +} + +/*****************************************************************************/ +/* */ +/* Function Name : dump_output */ +/* */ +/* Description : Used to dump output YUV */ +/* Inputs : App context, disp output desc, File pointer */ +/* Globals : None */ +/* Processing : Dumps to a file */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes (Describe the changes made) */ +/* 07 06 2006 Sankar Creation */ +/* */ +/*****************************************************************************/ +void dump_output(vid_dec_ctx_t *ps_app_ctx, + iv_yuv_buf_t *ps_disp_frm_buf, + UWORD32 u4_disp_frm_id, + FILE *ps_op_file, + FILE *ps_op_chksum_file, + WORD32 i4_op_frm_ts, + UWORD32 file_save, + UWORD32 chksum_save) + +{ + + UWORD32 i; + iv_yuv_buf_t s_dump_disp_frm_buf; + UWORD32 u4_disp_id; + + memset(&s_dump_disp_frm_buf, 0, sizeof(iv_yuv_buf_t)); + + if(ps_app_ctx->share_disp_buf) + { + if(ps_app_ctx->dump_q_wr_idx == MAX_DISP_BUFFERS + ) + ps_app_ctx->dump_q_wr_idx = 0; + + if(ps_app_ctx->dump_q_rd_idx == MAX_DISP_BUFFERS + ) + ps_app_ctx->dump_q_rd_idx = 0; + + ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_wr_idx] = + *ps_disp_frm_buf; + ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_wr_idx] = + u4_disp_frm_id; + ps_app_ctx->dump_q_wr_idx++; + + if((WORD32)i4_op_frm_ts >= (WORD32)(ps_app_ctx->disp_delay - 1)) + { + s_dump_disp_frm_buf = + ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_rd_idx]; + u4_disp_id = + ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_rd_idx]; + ps_app_ctx->dump_q_rd_idx++; + } + else + { + return; + } + } + else + { + s_dump_disp_frm_buf = *ps_disp_frm_buf; + u4_disp_id = u4_disp_frm_id; + } + if(1 == ps_app_ctx->share_disp_buf) + release_disp_frame(ps_app_ctx->cocodec_obj, u4_disp_id); + + if(0 == file_save && 0 == chksum_save) + return; + + if(NULL == s_dump_disp_frm_buf.pv_y_buf) + return; + + if(ps_app_ctx->e_output_chroma_format == IV_YUV_420P) + { +#if DUMP_SINGLE_BUF + { + UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 24 - (s_dump_disp_frm_buf.u4_y_strd * 40); + + UWORD32 size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 80) + (s_dump_disp_frm_buf.u4_u_ht + 40)); + fwrite(buf, 1, size, ps_op_file); + + } +#else + if(0 != file_save) + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd; + } + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_u_strd; + } + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_v_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_v_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_v_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_v_strd; + } + + } + + if(0 != chksum_save) + { + UWORD8 au1_y_chksum[16]; + UWORD8 au1_u_chksum[16]; + UWORD8 au1_v_chksum[16]; + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_y_buf, + s_dump_disp_frm_buf.u4_y_strd, + s_dump_disp_frm_buf.u4_y_wd, + s_dump_disp_frm_buf.u4_y_ht, + au1_y_chksum); + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_u_buf, + s_dump_disp_frm_buf.u4_u_strd, + s_dump_disp_frm_buf.u4_u_wd, + s_dump_disp_frm_buf.u4_u_ht, + au1_u_chksum); + calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_v_buf, + s_dump_disp_frm_buf.u4_v_strd, + s_dump_disp_frm_buf.u4_v_wd, + s_dump_disp_frm_buf.u4_v_ht, + au1_v_chksum); + + fwrite(au1_y_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + fwrite(au1_u_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + fwrite(au1_v_chksum, sizeof(UWORD8), 16, ps_op_chksum_file); + } +#endif + } + else if((ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_UV) + || (ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_VU)) + { +#if DUMP_SINGLE_BUF + { + + UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 24 - (s_dump_disp_frm_buf.u4_y_strd * 40); + + UWORD32 size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 80) + (s_dump_disp_frm_buf.u4_u_ht + 40)); + fwrite(buf, 1, size, ps_op_file); + } +#else + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd; + } + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file); + buf += s_dump_disp_frm_buf.u4_u_strd; + } + } +#endif + } + else if(ps_app_ctx->e_output_chroma_format == IV_RGBA_8888) + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd * 4, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd * 4; + } + } + else + { + UWORD8 *buf; + + buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf; + for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++) + { + fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_strd * 2, ps_op_file); + buf += s_dump_disp_frm_buf.u4_y_strd * 2; + } + } + + fflush(ps_op_file); + fflush(ps_op_chksum_file); + +} + + +/*****************************************************************************/ +/* */ +/* Function Name : print_usage */ +/* */ +/* Description : Prints argument format */ +/* */ +/* */ +/* Inputs : */ +/* Globals : */ +/* Processing : Prints argument format */ +/* */ +/* Outputs : */ +/* Returns : */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void print_usage(void) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + printf("\nUsage:\n"); + while(i < num_entries) + { + printf("%-32s\t %s", argument_mapping[i].argument_name, + argument_mapping[i].description); + i++; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +ARGUMENT_T get_argument(CHAR *name) +{ + WORD32 i = 0; + WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t); + while(i < num_entries) + { + if((0 == strcmp(argument_mapping[i].argument_name, name)) || + ((0 == strcmp(argument_mapping[i].argument_shortname, name)) && + (0 != strcmp(argument_mapping[i].argument_shortname, "--")))) + { + return argument_mapping[i].argument; + } + i++; + } + return INVALID; +} + +/*****************************************************************************/ +/* */ +/* Function Name : get_argument */ +/* */ +/* Description : Gets argument for a given string */ +/* */ +/* */ +/* Inputs : name */ +/* Globals : */ +/* Processing : Searches the given string in the array and returns */ +/* appropriate argument ID */ +/* */ +/* Outputs : Argument ID */ +/* Returns : Argument ID */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value) +{ + ARGUMENT_T arg; + + arg = get_argument(argument); + switch(arg) + { + case HELP: + print_usage(); + exit(-1); + case VERSION: + break; + case INPUT_FILE: + sscanf(value, "%s", ps_app_ctx->ac_ip_fname); + //input_passed = 1; + break; + + case OUTPUT: + sscanf(value, "%s", ps_app_ctx->ac_op_fname); + break; + + case CHKSUM: + sscanf(value, "%s", ps_app_ctx->ac_op_chksum_fname); + break; + + case SAVE_OUTPUT: + sscanf(value, "%d", &ps_app_ctx->u4_file_save_flag); + break; + + case SAVE_CHKSUM: + sscanf(value, "%d", &ps_app_ctx->u4_chksum_save_flag); + break; + + case CHROMA_FORMAT: + if((strcmp(value, "YUV_420P")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_420P; + else if((strcmp(value, "YUV_422ILE")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_422ILE; + else if((strcmp(value, "RGB_565")) == 0) + ps_app_ctx->e_output_chroma_format = IV_RGB_565; + else if((strcmp(value, "RGBA_8888")) == 0) + ps_app_ctx->e_output_chroma_format = IV_RGBA_8888; + else if((strcmp(value, "YUV_420SP_UV")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_UV; + else if((strcmp(value, "YUV_420SP_VU")) == 0) + ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_VU; + else + { + printf("\nInvalid colour format setting it to IV_YUV_420P\n"); + ps_app_ctx->e_output_chroma_format = IV_YUV_420P; + } + + break; + case NUM_FRAMES: + sscanf(value, "%d", &ps_app_ctx->u4_max_frm_ts); + break; + + case NUM_CORES: + sscanf(value, "%d", &ps_app_ctx->u4_num_cores); + break; + case DEGRADE_PICS: + sscanf(value, "%d", &ps_app_ctx->i4_degrade_pics); + ps_app_ctx->i4_degrade_pics = 0; + printf("degrade_pics is not supported. Setting it to zero"); + break; + case DEGRADE_TYPE: + sscanf(value, "%d", &ps_app_ctx->i4_degrade_type); + break; + case SHARE_DISPLAY_BUF: + sscanf(value, "%d", &ps_app_ctx->share_disp_buf); + break; + case LOOPBACK: + sscanf(value, "%d", &ps_app_ctx->loopback); + break; + case DISPLAY: +#if defined(SDL_DISPLAY) || defined(FBDEV_DISPLAY) || defined(INTEL_CE5300) || defined(IOS_DISPLAY) + sscanf(value, "%d", &ps_app_ctx->display); +#else + ps_app_ctx->display = 0; +#endif + break; + case FULLSCREEN: + sscanf(value, "%d", &ps_app_ctx->full_screen); + break; + case FPS: + sscanf(value, "%d", &ps_app_ctx->fps); + if(ps_app_ctx->fps <= 0) + ps_app_ctx->fps = DEFAULT_FPS; + break; + case MAX_WD: + sscanf(value, "%d", &ps_app_ctx->max_wd); + break; + case MAX_HT: + sscanf(value, "%d", &ps_app_ctx->max_ht); + break; + case MAX_LEVEL: + sscanf(value, "%d", &ps_app_ctx->max_level); + break; + case ARCH: + if((strcmp(value, "ARM_NONEON")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_NONEON; + else if((strcmp(value, "ARM_A9Q")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_A9Q; + else if((strcmp(value, "ARM_V8")) == 0) + ps_app_ctx->e_arch = ARCH_ARMV8_GENERIC; + else if((strcmp(value, "ARM_A7")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_A7; + else if((strcmp(value, "ARM_A5")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_A5; + else if((strcmp(value, "ARM_NEONINTR")) == 0) + ps_app_ctx->e_arch = ARCH_ARM_NEONINTR; + else if((strcmp(value, "X86_GENERIC")) == 0) + ps_app_ctx->e_arch = ARCH_X86_GENERIC; + else if((strcmp(value, "X86_SSSE3")) == 0) + ps_app_ctx->e_arch = ARCH_X86_SSSE3; + else if((strcmp(value, "X86_SSE42")) == 0) + ps_app_ctx->e_arch = ARCH_X86_SSE42; + else if((strcmp(value, "X86_AVX2")) == 0) + ps_app_ctx->e_arch = ARCH_X86_AVX2; + else if((strcmp(value, "MIPS_GENERIC")) == 0) + ps_app_ctx->e_arch = ARCH_MIPS_GENERIC; + else if((strcmp(value, "MIPS_32")) == 0) + ps_app_ctx->e_arch = ARCH_MIPS_32; + else + { + printf("\nInvalid Arch. Setting it to ARM_A9Q\n"); + ps_app_ctx->e_arch = ARCH_ARM_A9Q; + } + + break; + case SOC: + if((strcmp(value, "GENERIC")) == 0) + ps_app_ctx->e_soc = SOC_GENERIC; + else if((strcmp(value, "HISI_37X")) == 0) + ps_app_ctx->e_soc = SOC_HISI_37X; + else + { + ps_app_ctx->e_soc = atoi(value); +/* + printf("\nInvalid SOC. Setting it to GENERIC\n"); + ps_app_ctx->e_soc = SOC_GENERIC; +*/ + } + break; + case PICLEN: + sscanf(value, "%d", &ps_app_ctx->u4_piclen_flag); + break; + + case PICLEN_FILE: + sscanf(value, "%s", ps_app_ctx->ac_piclen_fname); + break; + + case INVALID: + default: + printf("Ignoring argument : %s\n", argument); + break; + } +} + +/*****************************************************************************/ +/* */ +/* Function Name : read_cfg_file */ +/* */ +/* Description : Reads arguments from a configuration file */ +/* */ +/* */ +/* Inputs : ps_app_ctx : Application context */ +/* fp_cfg_file : Configuration file handle */ +/* Globals : */ +/* Processing : Parses the arguments and fills in the application context*/ +/* */ +/* Outputs : Arguments parsed */ +/* Returns : None */ +/* */ +/* Issues : */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* */ +/*****************************************************************************/ + +void read_cfg_file(vid_dec_ctx_t *ps_app_ctx, FILE *fp_cfg_file) +{ + + CHAR line[STRLENGTH]; + CHAR description[STRLENGTH]; + CHAR value[STRLENGTH]; + CHAR argument[STRLENGTH]; + void *ret; + while(0 == feof(fp_cfg_file)) + { + line[0] = '\0'; + ret = fgets(line, STRLENGTH, fp_cfg_file); + if(NULL == ret) + break; + argument[0] = '\0'; + /* Reading Input File Name */ + sscanf(line, "%s %s %s", argument, value, description); + if(argument[0] == '\0') + continue; + + parse_argument(ps_app_ctx, argument, value); + } + + +} + +/*! +************************************************************************** +* \if Function name : dispq_producer_dequeue \endif +* +* \brief +* This function gets a free buffer index where display data can be written +* This is a blocking call and can be exited by setting quit to true in +* the application context +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_producer_dequeue(vid_dec_ctx_t *ps_app_ctx) +{ + WORD32 idx; + + /* If there is no free buffer wait */ + + while(((ps_app_ctx->disp_q_wr_idx + 1) % NUM_DISPLAY_BUFFERS) == ps_app_ctx->disp_q_rd_idx) + { + + ithread_msleep(1); + + if(ps_app_ctx->quit) + return (-1); + } + + idx = ps_app_ctx->disp_q_wr_idx; + return (idx); +} + +/*! +************************************************************************** +* \if Function name : dispq_producer_queue \endif +* +* \brief +* This function adds buffer which can be displayed +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_producer_queue(vid_dec_ctx_t *ps_app_ctx) +{ + ps_app_ctx->disp_q_wr_idx++; + if(ps_app_ctx->disp_q_wr_idx == NUM_DISPLAY_BUFFERS) + ps_app_ctx->disp_q_wr_idx = 0; + + return (0); +} +/*! +************************************************************************** +* \if Function name : dispq_consumer_dequeue \endif +* +* \brief +* This function gets a free buffer index where display data can be written +* This is a blocking call and can be exited by setting quit to true in +* the application context +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_consumer_dequeue(vid_dec_ctx_t *ps_app_ctx) +{ + WORD32 idx; + + /* If there is no free buffer wait */ + + while(ps_app_ctx->disp_q_wr_idx == ps_app_ctx->disp_q_rd_idx) + { + + ithread_msleep(1); + + if(ps_app_ctx->quit) + return (-1); + } + + idx = ps_app_ctx->disp_q_rd_idx; + return (idx); +} + +/*! +************************************************************************** +* \if Function name : dispq_producer_queue \endif +* +* \brief +* This function adds buffer which can be displayed +* +* \param[in] ps_app_ctx : Pointer to application context +* +* \return +* returns Next free buffer index for producer +* +* \author +* Ittiam +* +************************************************************************** +*/ +WORD32 dispq_consumer_queue(vid_dec_ctx_t *ps_app_ctx) +{ + ps_app_ctx->disp_q_rd_idx++; + if(ps_app_ctx->disp_q_rd_idx == NUM_DISPLAY_BUFFERS) + ps_app_ctx->disp_q_rd_idx = 0; + + return (0); +} + +/*****************************************************************************/ +/* */ +/* Function Name : display_thread */ +/* */ +/* Description : Thread to display the frame */ +/* */ +/* */ +/* Inputs : pv_ctx : Application context */ +/* */ +/* Globals : */ +/* Processing : Wait for a buffer to get produced by decoder and display */ +/* that frame */ +/* */ +/* Outputs : */ +/* Returns : None */ +/* */ +/* Issues : Pause followed by quit is making some deadlock condn */ +/* If decoder was lagging initially and then fasten up, */ +/* display will also go at faster rate till it reaches */ +/* equilibrium wrt the initial time */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 05 2013 100578 Initial Version */ +/* */ +/*****************************************************************************/ + +WORD32 display_thread(void *pv_ctx) +{ + vid_dec_ctx_t *ps_app_ctx = (vid_dec_ctx_t *)pv_ctx; + + + UWORD32 frm_duration; /* in us */ + UWORD32 current_time; + UWORD32 expected_time; + TIMER s_end_timer; + TIMER s_first_frame_time; + UWORD32 first_frame_displayed; + +#ifdef WINDOWS_TIMER + TIMER frequency; +#endif + + +#ifdef WINDOWS_TIMER + QueryPerformanceFrequency(&frequency); +#endif + first_frame_displayed = 0; + expected_time = 0; + frm_duration = 1000000 / ps_app_ctx->fps; + + /* Init display and allocate display buffers */ + ps_app_ctx->pv_disp_ctx = (void *)ps_app_ctx->disp_init(ps_app_ctx->u4_pic_wd, + ps_app_ctx->u4_pic_ht, + ps_app_ctx->i4_screen_wd, + ps_app_ctx->i4_screen_ht, + ps_app_ctx->max_wd, + ps_app_ctx->max_ht, + ps_app_ctx->full_screen, + &ps_app_ctx->quit, + &ps_app_ctx->paused); + ps_app_ctx->alloc_disp_buffers(ps_app_ctx->pv_disp_ctx); + + ps_app_ctx->display_init_done = 1; + + while(1) + { + WORD32 rd_idx; + + rd_idx = dispq_consumer_dequeue(ps_app_ctx); + if(ps_app_ctx->quit) + break; + + ps_app_ctx->display_buffer(ps_app_ctx->pv_disp_ctx, rd_idx); + + if(0 == first_frame_displayed) + { + GETTIME(&s_first_frame_time); + first_frame_displayed = 1; + } + + /*********************************************************************/ + /* Sleep based on the expected time of arrival of current buffer and */ + /* the Current frame */ + /*********************************************************************/ + + GETTIME(&s_end_timer); + ELAPSEDTIME(s_first_frame_time, s_end_timer, current_time, frequency); + + /* time in micro second */ + expected_time += frm_duration; + + //printf("current_time %d expected_time %d diff %d \n", current_time, expected_time, (expected_time - current_time)); + /* sleep for the diff. in time */ + if(current_time < expected_time) + ps_app_ctx->disp_usleep((expected_time - current_time)); + else + expected_time += (current_time - expected_time); + + dispq_consumer_queue(ps_app_ctx); + + } + + + while(0 == ps_app_ctx->display_deinit_flag) + { + ps_app_ctx->disp_usleep(1000); + } + ps_app_ctx->disp_deinit(ps_app_ctx->pv_disp_ctx); + + /* destroy the display thread */ + ithread_exit(ps_app_ctx->display_thread_handle); + + return 0; +} + +void flush_output(iv_obj_t *codec_obj, + vid_dec_ctx_t *ps_app_ctx, + ivd_out_bufdesc_t *ps_out_buf, + UWORD8 *pu1_bs_buf, + UWORD32 *pu4_op_frm_ts, + FILE *ps_op_file, + FILE *ps_op_chksum_file, + UWORD32 u4_ip_frm_ts, + UWORD32 u4_bytes_remaining) +{ + WORD32 ret; + + do + { + + ivd_ctl_flush_ip_t s_ctl_ip; + ivd_ctl_flush_op_t s_ctl_op; + + if(*pu4_op_frm_ts >= (ps_app_ctx->u4_max_frm_ts + ps_app_ctx->disp_delay)) + break; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; + s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t); + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(ret != IV_SUCCESS) + { + printf("Error in Setting the decoder in flush mode\n"); + } + + if(IV_SUCCESS == ret) + { + ivd_video_decode_ip_t s_video_decode_ip; + ivd_video_decode_op_t s_video_decode_op; + + s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_video_decode_ip.u4_ts = u4_ip_frm_ts; + s_video_decode_ip.pv_stream_buffer = pu1_bs_buf; + s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining; + s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] = + ps_out_buf->u4_min_out_buf_size[0]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] = + ps_out_buf->u4_min_out_buf_size[1]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] = + ps_out_buf->u4_min_out_buf_size[2]; + + s_video_decode_ip.s_out_buffer.pu1_bufs[0] = + ps_out_buf->pu1_bufs[0]; + s_video_decode_ip.s_out_buffer.pu1_bufs[1] = + ps_out_buf->pu1_bufs[1]; + s_video_decode_ip.s_out_buffer.pu1_bufs[2] = + ps_out_buf->pu1_bufs[2]; + s_video_decode_ip.s_out_buffer.u4_num_bufs = + ps_out_buf->u4_num_bufs; + + s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t); + + /*****************************************************************************/ + /* API Call: Video Decode */ + /*****************************************************************************/ + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip, + (void *)&s_video_decode_op); + + if(1 == s_video_decode_op.u4_output_present) + { + dump_output(ps_app_ctx, &(s_video_decode_op.s_disp_frm_buf), + s_video_decode_op.u4_disp_buf_id, ps_op_file, + ps_op_chksum_file, + *pu4_op_frm_ts, ps_app_ctx->u4_file_save_flag, + ps_app_ctx->u4_chksum_save_flag); + + (*pu4_op_frm_ts)++; + } + } + }while(IV_SUCCESS == ret); + +} + +#ifdef X86_MINGW +void sigsegv_handler() +{ + printf("Segmentation fault, Exiting.. \n"); + exit(-1); +} +#endif + +UWORD32 default_get_stride(void) +{ + return 0; +} + + +IV_COLOR_FORMAT_T default_get_color_fmt(void) +{ + return IV_YUV_420P; +} +/*****************************************************************************/ +/* */ +/* Function Name : main */ +/* */ +/* Description : Application to demonstrate codec API */ +/* */ +/* */ +/* Inputs : argc - Number of arguments */ +/* argv[] - Arguments */ +/* Globals : */ +/* Processing : Shows how to use create, process, control and delete */ +/* */ +/* Outputs : Codec output in a file */ +/* Returns : */ +/* */ +/* Issues : Assumes both PROFILE_ENABLE to be */ +/* defined for multithread decode-display working */ +/* */ +/* Revision History: */ +/* */ +/* DD MM YYYY Author(s) Changes */ +/* 07 09 2012 100189 Initial Version */ +/* 09 05 2013 100578 Multithread decode-display */ +/*****************************************************************************/ +#ifdef IOS +int vdec_main(char *homedir, char *documentdir, int screen_wd, int screen_ht) +#else +int main(WORD32 argc, CHAR *argv[]) +#endif +{ + CHAR ac_cfg_fname[STRLENGTH]; + FILE *fp_cfg_file = NULL; + FILE *ps_piclen_file = NULL; + FILE *ps_ip_file = NULL; + FILE *ps_op_file = NULL; + FILE *ps_op_chksum_file = NULL; + WORD32 ret; + CHAR ac_error_str[STRLENGTH]; + vid_dec_ctx_t s_app_ctx; + UWORD8 *pu1_bs_buf; + + ivd_out_bufdesc_t *ps_out_buf; + UWORD32 u4_num_bytes_dec = 0; + UWORD32 file_pos = 0; + IV_API_CALL_STATUS_T e_dec_status; + UWORD32 u4_ip_frm_ts = 0, u4_op_frm_ts = 0; + + WORD32 u4_bytes_remaining = 0; + void *pv_mem_rec_location; + UWORD32 u4_num_mem_recs; + UWORD32 i; + UWORD32 u4_ip_buf_len; + UWORD32 frm_cnt = 0; + WORD32 total_bytes_comsumed; + +#ifdef PROFILE_ENABLE + UWORD32 u4_tot_cycles = 0; + UWORD32 u4_tot_fmt_cycles = 0; + UWORD32 peak_window[PEAK_WINDOW_SIZE]; + UWORD32 peak_window_idx = 0; + UWORD32 peak_avg_max = 0; +#ifdef INTEL_CE5300 + UWORD32 time_consumed = 0; + UWORD32 bytes_consumed = 0; +#endif +#endif +#ifdef WINDOWS_TIMER + TIMER frequency; +#endif + WORD32 width = 0, height = 0; + iv_obj_t *codec_obj; +#if defined(GPU_BUILD) && !defined(X86) +// int ioctl_init(); +// ioctl_init(); +#endif + +#ifdef X86_MINGW + //For getting printfs without any delay + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); +#endif +#ifdef IOS + sprintf(filename_trace, "%s/iostrace.txt", homedir); + printf("\ntrace file name = %s", filename_trace); +#endif + +#ifdef X86_MINGW + { + signal(SIGSEGV, sigsegv_handler); + } +#endif + + +#ifndef IOS + /* Usage */ + if(argc < 2) + { + printf("Using test.cfg as configuration file \n"); + strcpy(ac_cfg_fname, "test.cfg"); + } + else if(argc == 2) + { + strcpy(ac_cfg_fname, argv[1]); + } + +#else + strcpy(ac_cfg_fname, "test.cfg"); + +#endif + + + /***********************************************************************/ + /* Initialize Application parameters */ + /***********************************************************************/ + + strcpy(s_app_ctx.ac_ip_fname, "\0"); + s_app_ctx.dump_q_wr_idx = 0; + s_app_ctx.dump_q_rd_idx = 0; + s_app_ctx.display_thread_created = 0; + s_app_ctx.disp_q_wr_idx = 0; + s_app_ctx.disp_q_rd_idx = 0; + s_app_ctx.disp_delay = 0; + s_app_ctx.loopback = 0; + s_app_ctx.display = 0; + s_app_ctx.full_screen = 0; + s_app_ctx.u4_piclen_flag = 0; + s_app_ctx.fps = DEFAULT_FPS; + file_pos = 0; + total_bytes_comsumed = 0; + u4_ip_frm_ts = 0; + u4_op_frm_ts = 0; +#ifdef PROFILE_ENABLE + memset(peak_window, 0, sizeof(WORD32) * PEAK_WINDOW_SIZE); +#endif + s_app_ctx.share_disp_buf = DEFAULT_SHARE_DISPLAY_BUF; + s_app_ctx.u4_num_cores = DEFAULT_NUM_CORES; + s_app_ctx.i4_degrade_type = 0; + s_app_ctx.i4_degrade_pics = 0; + s_app_ctx.max_wd = 0; + s_app_ctx.max_ht = 0; + s_app_ctx.max_level = 0; + s_app_ctx.e_arch = ARCH_ARM_A9Q; + s_app_ctx.e_soc = SOC_GENERIC; + + s_app_ctx.u4_strd = STRIDE; + + s_app_ctx.display_thread_handle = malloc(ithread_get_handle_size()); + s_app_ctx.quit = 0; + s_app_ctx.paused = 0; + //s_app_ctx.u4_output_present = 0; + + s_app_ctx.get_stride = &default_get_stride; + + s_app_ctx.get_color_fmt = &default_get_color_fmt; + + /* Set function pointers for display */ +#ifdef SDL_DISPLAY + s_app_ctx.disp_init = &sdl_disp_init; + s_app_ctx.alloc_disp_buffers = &sdl_alloc_disp_buffers; + s_app_ctx.display_buffer = &sdl_display; + s_app_ctx.set_disp_buffers = &sdl_set_disp_buffers; + s_app_ctx.disp_deinit = &sdl_disp_deinit; + s_app_ctx.disp_usleep = &sdl_disp_usleep; + s_app_ctx.get_color_fmt = &sdl_get_color_fmt; + s_app_ctx.get_stride = &sdl_get_stride; +#endif + +#ifdef FBDEV_DISPLAY + s_app_ctx.disp_init = &fbd_disp_init; + s_app_ctx.alloc_disp_buffers = &fbd_alloc_disp_buffers; + s_app_ctx.display_buffer = &fbd_display; + s_app_ctx.set_disp_buffers = &fbd_set_disp_buffers; + s_app_ctx.disp_deinit = &fbd_disp_deinit; + s_app_ctx.disp_usleep = &fbd_disp_usleep; + s_app_ctx.get_color_fmt = &fbd_get_color_fmt; + s_app_ctx.get_stride = &fbd_get_stride; +#endif + +#ifdef INTEL_CE5300 + s_app_ctx.disp_init = &gdl_disp_init; + s_app_ctx.alloc_disp_buffers = &gdl_alloc_disp_buffers; + s_app_ctx.display_buffer = &gdl_display; + s_app_ctx.set_disp_buffers = &gdl_set_disp_buffers; + s_app_ctx.disp_deinit = &gdl_disp_deinit; + s_app_ctx.disp_usleep = &gdl_disp_usleep; + s_app_ctx.get_color_fmt = &gdl_get_color_fmt; + s_app_ctx.get_stride = &gdl_get_stride; +#endif + +#ifdef IOS_DISPLAY + s_app_ctx.disp_init = &ios_disp_init; + s_app_ctx.alloc_disp_buffers = &ios_alloc_disp_buffers; + s_app_ctx.display_buffer = &ios_display; + s_app_ctx.set_disp_buffers = &ios_set_disp_buffers; + s_app_ctx.disp_deinit = &ios_disp_deinit; + s_app_ctx.disp_usleep = &ios_disp_usleep; + s_app_ctx.get_color_fmt = &ios_get_color_fmt; + s_app_ctx.get_stride = &ios_get_stride; +#endif + + s_app_ctx.display_deinit_flag = 0; + s_app_ctx.e_output_chroma_format = IV_YUV_420SP_UV; + /*************************************************************************/ + /* Parse arguments */ + /*************************************************************************/ + +#ifndef IOS + /* Read command line arguments */ + if(argc > 2) + { + for(i = 1; i < (UWORD32)argc; i += 2) + { + if(CONFIG == get_argument(argv[i])) + { + strcpy(ac_cfg_fname, argv[i + 1]); + if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL) + { + sprintf(ac_error_str, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error_str); + } + read_cfg_file(&s_app_ctx, fp_cfg_file); + fclose(fp_cfg_file); + } + else + { + parse_argument(&s_app_ctx, argv[i], argv[i + 1]); + } + } + } + else + { + if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL) + { + sprintf(ac_error_str, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error_str); + } + read_cfg_file(&s_app_ctx, fp_cfg_file); + fclose(fp_cfg_file); + } +#else + sprintf(filename_with_path, "%s/%s", homedir, ac_cfg_fname); + if((fp_cfg_file = fopen(filename_with_path, "r")) == NULL) + { + sprintf(ac_error_str, "Could not open Configuration file %s", + ac_cfg_fname); + codec_exit(ac_error_str); + + } + read_cfg_file(&s_app_ctx, fp_cfg_file); + fclose(fp_cfg_file); + +#endif +#ifdef PRINT_PICSIZE + /* If the binary is used for only getting number of bytes in each picture, then disable the following features */ + s_app_ctx.u4_piclen_flag = 0; + s_app_ctx.u4_file_save_flag = 0; + s_app_ctx.u4_chksum_save_flag = 0; + s_app_ctx.i4_degrade_pics = 0; + s_app_ctx.i4_degrade_type = 0; + s_app_ctx.loopback = 0; + s_app_ctx.share_disp_buf = 0; + s_app_ctx.display = 0; +#endif + + /* If display is enabled, then turn off shared mode and get color format that is supported by display */ + if(1 == s_app_ctx.display) + { + s_app_ctx.share_disp_buf = 0; + s_app_ctx.e_output_chroma_format = s_app_ctx.get_color_fmt(); + } + if(strcmp(s_app_ctx.ac_ip_fname, "\0") == 0) + { + printf("\nNo input file given for decoding\n"); + exit(-1); + } + + + /***********************************************************************/ + /* create the file object for input file */ + /***********************************************************************/ +#ifdef IOS + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_ip_fname); + ps_ip_file = fopen(filename_with_path, "rb"); +#else + ps_ip_file = fopen(s_app_ctx.ac_ip_fname, "rb"); +#endif + if(NULL == ps_ip_file) + { + sprintf(ac_error_str, "Could not open input file %s", + s_app_ctx.ac_ip_fname); + codec_exit(ac_error_str); + } + /***********************************************************************/ + /* create the file object for input file */ + /***********************************************************************/ + if(1 == s_app_ctx.u4_piclen_flag) + { +#ifdef IOS + sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_piclen_fname); + ps_piclen_file = fopen(filename_with_path, "rb"); +#else + ps_piclen_file = fopen(s_app_ctx.ac_piclen_fname, "rb"); +#endif + if(NULL == ps_piclen_file) + { + sprintf(ac_error_str, "Could not open piclen file %s", + s_app_ctx.ac_piclen_fname); + codec_exit(ac_error_str); + } + } + + /***********************************************************************/ + /* create the file object for output file */ + /***********************************************************************/ + if(1 == s_app_ctx.u4_file_save_flag) + { +#ifdef IOS + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_fname); + ps_op_file = fopen(filename_with_path, "wb"); +#else + ps_op_file = fopen(s_app_ctx.ac_op_fname, "wb"); +#endif + + if(NULL == ps_op_file) + { + sprintf(ac_error_str, "Could not open output file %s", + s_app_ctx.ac_op_fname); + codec_exit(ac_error_str); + } + } + + /***********************************************************************/ + /* create the file object for check sum file */ + /***********************************************************************/ + if(1 == s_app_ctx.u4_chksum_save_flag) + { +#if IOS + sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_chksum_fname); + ps_op_chksum_file = fopen(filename_with_path, "wb"); +#else + ps_op_chksum_file = fopen(s_app_ctx.ac_op_chksum_fname, "wb"); +#endif + if(NULL == ps_op_chksum_file) + { + sprintf(ac_error_str, "Could not open check sum file %s", + s_app_ctx.ac_op_chksum_fname); + codec_exit(ac_error_str); + } + } + /***********************************************************************/ + /* Create decoder instance */ + /***********************************************************************/ + { + + ps_out_buf = (ivd_out_bufdesc_t *)malloc(sizeof(ivd_out_bufdesc_t)); + + { + iv_num_mem_rec_ip_t s_no_of_mem_rec_query_ip; + iv_num_mem_rec_op_t s_no_of_mem_rec_query_op; + + s_no_of_mem_rec_query_ip.u4_size = sizeof(s_no_of_mem_rec_query_ip); + s_no_of_mem_rec_query_op.u4_size = sizeof(s_no_of_mem_rec_query_op); + s_no_of_mem_rec_query_ip.e_cmd = IV_CMD_GET_NUM_MEM_REC; + + /*****************************************************************************/ + /* API Call: Get Number of Mem Records */ + /*****************************************************************************/ + e_dec_status = ivd_api_function( + NULL, (void *)&s_no_of_mem_rec_query_ip, + (void *)&s_no_of_mem_rec_query_op); + if(IV_SUCCESS != e_dec_status) + { + sprintf(ac_error_str, "Error in get mem records"); + codec_exit(ac_error_str); + } + + u4_num_mem_recs = s_no_of_mem_rec_query_op.u4_num_mem_rec; + } + + pv_mem_rec_location = malloc(u4_num_mem_recs * sizeof(iv_mem_rec_t)); + if(pv_mem_rec_location == NULL) + { + sprintf(ac_error_str, "Allocation failure for mem_rec_location"); + codec_exit(ac_error_str); + + } + + { + impeg2d_fill_mem_rec_ip_t s_fill_mem_rec_ip; + impeg2d_fill_mem_rec_op_t s_fill_mem_rec_op; + iv_mem_rec_t *ps_mem_rec; + UWORD32 total_size; + + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd = + IV_CMD_FILL_NUM_MEM_REC; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location = + (iv_mem_rec_t *)pv_mem_rec_location; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd = + (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd; + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht = + (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht; + s_fill_mem_rec_ip.u4_share_disp_buf = s_app_ctx.share_disp_buf; + s_fill_mem_rec_ip.e_output_format = + (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format; + + s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size = + sizeof(impeg2d_fill_mem_rec_ip_t); + s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size = + sizeof(impeg2d_fill_mem_rec_op_t); + + ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location; + for(i = 0; i < u4_num_mem_recs; i++) + ps_mem_rec[i].u4_size = sizeof(iv_mem_rec_t); + + /*****************************************************************************/ + /* API Call: Fill Mem Records */ + /*****************************************************************************/ + + e_dec_status = ivd_api_function(NULL, + (void *)&s_fill_mem_rec_ip, + (void *)&s_fill_mem_rec_op); + + u4_num_mem_recs = + s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled; + + if(IV_SUCCESS != e_dec_status) + { + sprintf(ac_error_str, "Error in fill mem records: %x", s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code); + codec_exit(ac_error_str); + } + + ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location; + total_size = 0; + for(i = 0; i < u4_num_mem_recs; i++) + { + ps_mem_rec->pv_base = app_aligned_malloc(ps_mem_rec->u4_mem_alignment, + ps_mem_rec->u4_mem_size); + if(ps_mem_rec->pv_base == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for mem record id %d size %d\n", + i, ps_mem_rec->u4_mem_size); + codec_exit(ac_error_str); + + } + total_size += ps_mem_rec->u4_mem_size; + + ps_mem_rec++; + } + //printf("\nTotal memory for codec %d\n", total_size); + } + /*****************************************************************************/ + /* API Call: Initialize the Decoder */ + /*****************************************************************************/ + { + impeg2d_init_ip_t s_init_ip; + impeg2d_init_op_t s_init_op; + void *fxns = &ivd_api_function; + iv_mem_rec_t *mem_tab; + + mem_tab = (iv_mem_rec_t *)pv_mem_rec_location; + s_init_ip.s_ivd_init_ip_t.e_cmd = (IVD_API_COMMAND_TYPE_T)IV_CMD_INIT; + s_init_ip.s_ivd_init_ip_t.pv_mem_rec_location = mem_tab; + s_init_ip.s_ivd_init_ip_t.u4_frm_max_wd = (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd; + s_init_ip.s_ivd_init_ip_t.u4_frm_max_ht = (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht; + + s_init_ip.u4_share_disp_buf = s_app_ctx.share_disp_buf; + + s_init_ip.s_ivd_init_ip_t.u4_num_mem_rec = u4_num_mem_recs; + s_init_ip.s_ivd_init_ip_t.e_output_format = + (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format; + s_init_ip.s_ivd_init_ip_t.u4_size = sizeof(impeg2d_init_ip_t); + s_init_op.s_ivd_init_op_t.u4_size = sizeof(impeg2d_init_op_t); + + codec_obj = (iv_obj_t *)mem_tab[0].pv_base; + codec_obj->pv_fxns = fxns; + codec_obj->u4_size = sizeof(iv_obj_t); + + s_app_ctx.cocodec_obj = codec_obj; + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_init_ip, + (void *)&s_init_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "Error in Init %8x\n", + s_init_op.s_ivd_init_op_t.u4_error_code); + codec_exit(ac_error_str); + } + + /*****************************************************************************/ + /* Input and output buffer allocation */ + /*****************************************************************************/ + { + + ivd_ctl_getbufinfo_ip_t s_ctl_ip; + ivd_ctl_getbufinfo_op_t s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; + s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code); + codec_exit(ac_error_str); + } + + /* Allocate input buffer */ + u4_ip_buf_len = s_ctl_op.u4_min_in_buf_size[0]; + pu1_bs_buf = (UWORD8 *)malloc(u4_ip_buf_len); + + if(pu1_bs_buf == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for input buffer of size %d", + u4_ip_buf_len); + codec_exit(ac_error_str); + } + s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs; + /* Allocate output buffer only if display buffers are not shared */ + /* Or if shared and output is 420P */ + if((0 == s_app_ctx.share_disp_buf) || (IV_YUV_420P == s_app_ctx.e_output_chroma_format)) + { + UWORD32 outlen; + ps_out_buf->u4_min_out_buf_size[0] = + s_ctl_op.u4_min_out_buf_size[0]; + ps_out_buf->u4_min_out_buf_size[1] = + s_ctl_op.u4_min_out_buf_size[1]; + ps_out_buf->u4_min_out_buf_size[2] = + s_ctl_op.u4_min_out_buf_size[2]; + + outlen = s_ctl_op.u4_min_out_buf_size[0]; + if(s_ctl_op.u4_min_num_out_bufs > 1) + outlen += s_ctl_op.u4_min_out_buf_size[1]; + + if(s_ctl_op.u4_min_num_out_bufs > 2) + outlen += s_ctl_op.u4_min_out_buf_size[2]; + + ps_out_buf->pu1_bufs[0] = (UWORD8 *)malloc(outlen); + if(ps_out_buf->pu1_bufs[0] == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for output buffer of size %d", + outlen); + codec_exit(ac_error_str); + } + + if(s_ctl_op.u4_min_num_out_bufs > 1) + ps_out_buf->pu1_bufs[1] = ps_out_buf->pu1_bufs[0] + + (s_ctl_op.u4_min_out_buf_size[0]); + + if(s_ctl_op.u4_min_num_out_bufs > 2) + ps_out_buf->pu1_bufs[2] = ps_out_buf->pu1_bufs[1] + + (s_ctl_op.u4_min_out_buf_size[1]); + + ps_out_buf->u4_num_bufs = s_ctl_op.u4_min_num_out_bufs; + } + + } + } + + } + + + /*************************************************************************/ + /* set num of cores */ + /*************************************************************************/ + { + + impeg2d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip; + impeg2d_ctl_set_num_cores_op_t s_ctl_set_cores_op; + + s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_NUM_CORES; + s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores; + s_ctl_set_cores_ip.u4_size = sizeof(impeg2d_ctl_set_num_cores_ip_t); + s_ctl_set_cores_op.u4_size = sizeof(impeg2d_ctl_set_num_cores_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip, + (void *)&s_ctl_set_cores_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting number of cores"); + codec_exit(ac_error_str); + } + + } + /*************************************************************************/ + /* set processsor */ + /*************************************************************************/ + + { + + impeg2d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip; + impeg2d_ctl_set_processor_op_t s_ctl_set_num_processor_op; + + s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_num_processor_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_PROCESSOR; + s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch; + s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc; + s_ctl_set_num_processor_ip.u4_size = sizeof(impeg2d_ctl_set_processor_ip_t); + s_ctl_set_num_processor_op.u4_size = sizeof(impeg2d_ctl_set_processor_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_num_processor_ip, + (void *)&s_ctl_set_num_processor_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting Processor type"); + codec_exit(ac_error_str); + } + + } + + + /*****************************************************************************/ + /* Decode header to get width and height and buffer sizes */ + /*****************************************************************************/ + { + + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + + ivd_video_decode_ip_t s_video_decode_ip; + ivd_video_decode_op_t s_video_decode_op; + + s_ctl_ip.u4_disp_wd = STRIDE; + if(1 == s_app_ctx.display) + s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride(); + + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_HEADER; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, + "\nError in setting the codec in header decode mode"); + codec_exit(ac_error_str); + } + + do + { + WORD32 numbytes; + if(0 == s_app_ctx.u4_piclen_flag) + { + fseek(ps_ip_file, file_pos, SEEK_SET); + numbytes = u4_ip_buf_len; + } + else + { + WORD32 entries; + entries = fscanf(ps_piclen_file, "%d\n", &numbytes); + if(1 != entries) + numbytes = u4_ip_buf_len; + } + + u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), numbytes, + ps_ip_file); + + if(0 == u4_bytes_remaining) + { + sprintf(ac_error_str, "\nUnable to read from input file"); + codec_exit(ac_error_str); + } + + s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_video_decode_ip.u4_ts = u4_ip_frm_ts; + s_video_decode_ip.pv_stream_buffer = pu1_bs_buf; + s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining; + s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t); + + /*****************************************************************************/ + /* API Call: Header Decode */ + /*****************************************************************************/ + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip, + (void *)&s_video_decode_op); + + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in header decode %x", + s_video_decode_op.u4_error_code); + // codec_exit(ac_error_str); + } + + u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed; +#ifndef PROFILE_ENABLE + printf("%d\n", s_video_decode_op.u4_num_bytes_consumed); +#endif + file_pos += u4_num_bytes_dec; + total_bytes_comsumed += u4_num_bytes_dec; + }while(ret != IV_SUCCESS); + + /* copy pic_wd and pic_ht to initialize buffers */ + s_app_ctx.u4_pic_wd = s_video_decode_op.u4_pic_wd; + s_app_ctx.u4_pic_ht = s_video_decode_op.u4_pic_ht; + +#if IOS_DISPLAY + s_app_ctx.i4_screen_wd = screen_wd; + s_app_ctx.i4_screen_ht = screen_ht; +#endif + + /* Create display thread and wait for the display buffers to be initialized */ + if(1 == s_app_ctx.display) + { + if(0 == s_app_ctx.display_thread_created) + { + s_app_ctx.display_init_done = 0; + ithread_create(s_app_ctx.display_thread_handle, NULL, + (void *)&display_thread, (void *)&s_app_ctx); + s_app_ctx.display_thread_created = 1; + + while(1) + { + if(s_app_ctx.display_init_done) + break; + + ithread_msleep(1); + } + } + + s_app_ctx.u4_strd = s_app_ctx.get_stride(); + } + } + + /*************************************************************************/ + /* Get actual number of output buffers requried, which is dependent */ + /* on stream properties such as width, height and level etc */ + /* This is needed mainly for shared display mode */ + /*************************************************************************/ + //if(1 == s_app_ctx.share_disp_buf) + { + ivd_ctl_getbufinfo_ip_t s_ctl_ip; + ivd_ctl_getbufinfo_op_t s_ctl_op; + WORD32 outlen = 0; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; + s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code); + codec_exit(ac_error_str); + } + +#ifdef APP_EXTRA_BUFS + s_app_ctx.disp_delay = EXTRA_DISP_BUFFERS; + s_ctl_op.u4_num_disp_bufs += EXTRA_DISP_BUFFERS; +#endif + + /*****************************************************************************/ + /* API Call: Allocate display buffers for display buffer shared case */ + /*****************************************************************************/ + + for(i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + + s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[0] = + s_ctl_op.u4_min_out_buf_size[0]; + s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[1] = + s_ctl_op.u4_min_out_buf_size[1]; + s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[2] = + s_ctl_op.u4_min_out_buf_size[2]; + + outlen = s_ctl_op.u4_min_out_buf_size[0]; + if(s_ctl_op.u4_min_num_out_bufs > 1) + outlen += s_ctl_op.u4_min_out_buf_size[1]; + + if(s_ctl_op.u4_min_num_out_bufs > 2) + outlen += s_ctl_op.u4_min_out_buf_size[2]; + + s_app_ctx.s_disp_buffers[i].pu1_bufs[0] = (UWORD8 *)malloc(outlen); + + if(s_app_ctx.s_disp_buffers[i].pu1_bufs[0] == NULL) + { + sprintf(ac_error_str, + "\nAllocation failure for output buffer of size %d", + outlen); + codec_exit(ac_error_str); + } + + if(s_ctl_op.u4_min_num_out_bufs > 1) + s_app_ctx.s_disp_buffers[i].pu1_bufs[1] = + s_app_ctx.s_disp_buffers[i].pu1_bufs[0] + + (s_ctl_op.u4_min_out_buf_size[0]); + + if(s_ctl_op.u4_min_num_out_bufs > 2) + s_app_ctx.s_disp_buffers[i].pu1_bufs[2] = + s_app_ctx.s_disp_buffers[i].pu1_bufs[1] + + (s_ctl_op.u4_min_out_buf_size[1]); + + s_app_ctx.s_disp_buffers[i].u4_num_bufs = + s_ctl_op.u4_min_num_out_bufs; + } + s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs; + + /*****************************************************************************/ + /* API Call: Send the allocated display buffers to codec */ + /*****************************************************************************/ + if(1 == s_app_ctx.share_disp_buf) + { + ivd_set_display_frame_ip_t s_set_display_frame_ip; + ivd_set_display_frame_op_t s_set_display_frame_op; + + s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME; + s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t); + s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t); + + s_set_display_frame_ip.num_disp_bufs = s_app_ctx.num_disp_buf; + + memcpy(&(s_set_display_frame_ip.s_disp_buffer), + &(s_app_ctx.s_disp_buffers), + s_ctl_op.u4_num_disp_bufs * sizeof(ivd_out_bufdesc_t)); + + ret = ivd_api_function((iv_obj_t *)codec_obj, + (void *)&s_set_display_frame_ip, + (void *)&s_set_display_frame_op); + + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Set display frame"); + codec_exit(ac_error_str); + } + } + + + } + + /*************************************************************************/ + /* Get frame dimensions for display buffers such as x_offset,y_offset */ + /* etc. This information might be needed to set display buffer */ + /* offsets in case of shared display buffer mode */ + /*************************************************************************/ + { + + impeg2d_ctl_get_frame_dimensions_ip_t s_ctl_get_frame_dimensions_ip; + impeg2d_ctl_get_frame_dimensions_op_t s_ctl_get_frame_dimensions_op; + + s_ctl_get_frame_dimensions_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_get_frame_dimensions_ip.e_sub_cmd = + (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS; + s_ctl_get_frame_dimensions_ip.u4_size = + sizeof(impeg2d_ctl_get_frame_dimensions_ip_t); + s_ctl_get_frame_dimensions_op.u4_size = + sizeof(impeg2d_ctl_get_frame_dimensions_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_get_frame_dimensions_ip, + (void *)&s_ctl_get_frame_dimensions_op); + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Get buffer Dimensions"); + codec_exit(ac_error_str); + } + +/* + printf("Frame offsets due to padding\n"); + printf("s_ctl_get_frame_dimensions_op.x_offset[0] %d s_ctl_get_frame_dimensions_op.y_offset[0] %d\n", + s_ctl_get_frame_dimensions_op.u4_x_offset[0], + s_ctl_get_frame_dimensions_op.u4_y_offset[0]); +*/ + } + /*************************************************************************/ + /* Get VUI parameters */ + /*************************************************************************/ +#if 0 + { + + impeg2d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip; + impeg2d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op; + + s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_get_vui_params_ip.e_sub_cmd = + (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_GET_VUI_PARAMS; + s_ctl_get_vui_params_ip.u4_size = + sizeof(impeg2d_ctl_get_vui_params_ip_t); + s_ctl_get_vui_params_op.u4_size = + sizeof(impeg2d_ctl_get_vui_params_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_get_vui_params_ip, + (void *)&s_ctl_get_vui_params_op); + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Get VUI params"); + //codec_exit(ac_error_str); + } + + } +#endif + + /*************************************************************************/ + /* Set the decoder in frame decode mode. It was set in header decode */ + /* mode earlier */ + /*************************************************************************/ + { + + ivd_ctl_set_config_ip_t s_ctl_ip; + ivd_ctl_set_config_op_t s_ctl_op; + + s_ctl_ip.u4_disp_wd = STRIDE; + if(1 == s_app_ctx.display) + s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride(); + s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE; + + s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; + s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME; + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t); + + s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, (void *)&s_ctl_op); + + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Set Parameters"); + //codec_exit(ac_error_str); + } + + } + /*************************************************************************/ + /* If required disable deblocking and sao at given level */ + /*************************************************************************/ + set_degrade(codec_obj, s_app_ctx.i4_degrade_type, s_app_ctx.i4_degrade_pics); +#ifdef WINDOWS_TIMER + QueryPerformanceFrequency(&frequency); +#endif +#ifndef PRINT_PICSIZE + get_version(codec_obj); +#endif + while(u4_op_frm_ts < (s_app_ctx.u4_max_frm_ts + s_app_ctx.disp_delay)) + { + +#ifdef TEST_FLUSH + if(u4_ip_frm_ts == FLUSH_FRM_CNT) + { + ivd_ctl_flush_ip_t s_ctl_ip; + ivd_ctl_flush_op_t s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; + s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t); + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + + if(ret != IV_SUCCESS) + { + printf("Error in Setting the decoder in flush mode\n"); + } + file_pos = 0; + + fseek(ps_ip_file, file_pos, SEEK_SET); + + } +#endif + if(u4_ip_frm_ts < s_app_ctx.num_disp_buf && (1 == s_app_ctx.share_disp_buf)) + { + release_disp_frame(codec_obj, u4_ip_frm_ts); + } + + + /*************************************************************************/ + /* set num of cores */ + /*************************************************************************/ +#ifdef DYNAMIC_NUMCORES + { + + impeg2d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip; + impeg2d_ctl_set_num_cores_op_t s_ctl_set_cores_op; + + s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_cores_ip.e_sub_cmd = IMPEG2D_CMD_CTL_SET_NUM_CORES; + s_ctl_set_cores_ip.u4_num_cores = 1 + 3 * (u4_ip_frm_ts % 2); + s_ctl_set_cores_ip.u4_size = sizeof(impeg2d_ctl_set_num_cores_ip_t); + s_ctl_set_cores_op.u4_size = sizeof(impeg2d_ctl_set_num_cores_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip, + (void *)&s_ctl_set_cores_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting number of cores"); + codec_exit(ac_error_str); + } + + } +#endif + /***********************************************************************/ + /* Seek the file to start of current frame, this is equavelent of */ + /* having a parcer which tells the start of current frame */ + /***********************************************************************/ + { + WORD32 numbytes; + + if(0 == s_app_ctx.u4_piclen_flag) + { + fseek(ps_ip_file, file_pos, SEEK_SET); + numbytes = u4_ip_buf_len; + } + else + { + WORD32 entries; + entries = fscanf(ps_piclen_file, "%d\n", &numbytes); + if(1 != entries) + numbytes = u4_ip_buf_len; + } + + u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), + numbytes, ps_ip_file); + + if(u4_bytes_remaining == 0) + { + if(1 == s_app_ctx.loopback) + { + file_pos = 0; + if(0 == s_app_ctx.u4_piclen_flag) + { + fseek(ps_ip_file, file_pos, SEEK_SET); + numbytes = u4_ip_buf_len; + } + else + { + WORD32 entries; + entries = fscanf(ps_piclen_file, "%d\n", &numbytes); + if(1 != entries) + numbytes = u4_ip_buf_len; + } + + + u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), + numbytes, ps_ip_file); + } + else + break; + } + } + + /*********************************************************************/ + /* Following calls can be enabled at diffent times */ + /*********************************************************************/ +#if ENABLE_DEGRADE + if(u4_op_frm_ts >= 10000) + disable_deblocking(codec_obj, 4); + + if(u4_op_frm_ts == 30000) + enable_deblocking(codec_obj); + + if(u4_op_frm_ts == 10000) + enable_skippb_frames(codec_obj); + + if(u4_op_frm_ts == 60000) + disable_skippb_frames(codec_obj); + + if(u4_op_frm_ts == 30000) + enable_skipb_frames(codec_obj); + + if(u4_op_frm_ts == 60000) + disable_skipb_frames(codec_obj); +#endif + + + { + ivd_video_decode_ip_t s_video_decode_ip; + ivd_video_decode_op_t s_video_decode_op; +#ifdef PROFILE_ENABLE + UWORD32 s_elapsed_time; + TIMER s_start_timer; + TIMER s_end_timer; +#endif + + + s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_video_decode_ip.u4_ts = u4_ip_frm_ts; + s_video_decode_ip.pv_stream_buffer = pu1_bs_buf; + s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining; + s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] = + ps_out_buf->u4_min_out_buf_size[0]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] = + ps_out_buf->u4_min_out_buf_size[1]; + s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] = + ps_out_buf->u4_min_out_buf_size[2]; + + s_video_decode_ip.s_out_buffer.pu1_bufs[0] = + ps_out_buf->pu1_bufs[0]; + s_video_decode_ip.s_out_buffer.pu1_bufs[1] = + ps_out_buf->pu1_bufs[1]; + s_video_decode_ip.s_out_buffer.pu1_bufs[2] = + ps_out_buf->pu1_bufs[2]; + s_video_decode_ip.s_out_buffer.u4_num_bufs = + ps_out_buf->u4_num_bufs; + s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t); + + /* Get display buffer pointers */ + if(1 == s_app_ctx.display) + { + WORD32 wr_idx; + + wr_idx = dispq_producer_dequeue(&s_app_ctx); + + if(s_app_ctx.quit) + break; + + s_app_ctx.set_disp_buffers(s_app_ctx.pv_disp_ctx, wr_idx, + &s_video_decode_ip.s_out_buffer.pu1_bufs[0], + &s_video_decode_ip.s_out_buffer.pu1_bufs[1], + &s_video_decode_ip.s_out_buffer.pu1_bufs[2]); + } + + /*****************************************************************************/ + /* API Call: Video Decode */ + /*****************************************************************************/ + + GETTIME(&s_start_timer); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip, + (void *)&s_video_decode_op); + + + GETTIME(&s_end_timer); + ELAPSEDTIME(s_start_timer, s_end_timer, s_elapsed_time, frequency); +#ifdef PROFILE_ENABLE + { + UWORD32 peak_avg, id; + u4_tot_cycles += s_elapsed_time; + peak_window[peak_window_idx++] = s_elapsed_time; + if(peak_window_idx == PEAK_WINDOW_SIZE) + peak_window_idx = 0; + peak_avg = 0; + for(id = 0; id < PEAK_WINDOW_SIZE; id++) + { + peak_avg += peak_window[id]; + } + peak_avg /= PEAK_WINDOW_SIZE; + if(peak_avg > peak_avg_max) + peak_avg_max = peak_avg; + frm_cnt++; + + printf("FrameNum: %4d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d Output: %2d NumBytes: %6d \n", + frm_cnt, s_elapsed_time, u4_tot_cycles / frm_cnt, peak_avg_max, s_video_decode_op.u4_output_present, s_video_decode_op.u4_num_bytes_consumed); + + } +#ifdef INTEL_CE5300 + time_consumed += s_elapsed_time; + bytes_consumed += s_video_decode_op.u4_num_bytes_consumed; + if(!(frm_cnt % (s_app_ctx.fps))) + { + time_consumed = time_consumed / s_app_ctx.fps; + printf("Average decode time(micro sec) for the last second = %6d\n", time_consumed); + printf("Average bitrate(kb) for the last second = %6d\n", (bytes_consumed * 8) / 1024); + time_consumed = 0; + bytes_consumed = 0; + + } +#endif +#else + printf("%d\n", s_video_decode_op.u4_num_bytes_consumed); +#endif + + if(IV_SUCCESS != ret) + { + printf("Error in video Frame decode : ret %x Error %x\n", ret, + s_video_decode_op.u4_error_code); + if ((s_video_decode_op.u4_error_code & 0xFF) == IVD_RES_CHANGED) + { + ivd_ctl_reset_ip_t s_ctl_ip; + ivd_ctl_reset_op_t s_ctl_op; + + flush_output(codec_obj, &s_app_ctx, ps_out_buf, + pu1_bs_buf, &u4_op_frm_ts, + ps_op_file, ps_op_chksum_file, + u4_ip_frm_ts, u4_bytes_remaining); + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET; + s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, + (void *)&s_ctl_op); + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Reset"); + codec_exit(ac_error_str); + } + /*************************************************************************/ + /* set num of cores */ + /*************************************************************************/ + { + + impeg2d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip; + impeg2d_ctl_set_num_cores_op_t s_ctl_set_cores_op; + + s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_NUM_CORES; + s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores; + s_ctl_set_cores_ip.u4_size = sizeof(impeg2d_ctl_set_num_cores_ip_t); + s_ctl_set_cores_op.u4_size = sizeof(impeg2d_ctl_set_num_cores_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip, + (void *)&s_ctl_set_cores_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting number of cores"); + codec_exit(ac_error_str); + } + + } + /*************************************************************************/ + /* set processsor */ + /*************************************************************************/ + + { + + impeg2d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip; + impeg2d_ctl_set_processor_op_t s_ctl_set_num_processor_op; + + s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_set_num_processor_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_PROCESSOR; + s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch; + s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc; + s_ctl_set_num_processor_ip.u4_size = sizeof(impeg2d_ctl_set_processor_ip_t); + s_ctl_set_num_processor_op.u4_size = sizeof(impeg2d_ctl_set_processor_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_num_processor_ip, + (void *)&s_ctl_set_num_processor_op); + if(ret != IV_SUCCESS) + { + sprintf(ac_error_str, "\nError in setting Processor type"); + codec_exit(ac_error_str); + } + + } + + } + else if(IMPEG2D_UNSUPPORTED_DIMENSIONS + == (IMPEG2D_ERROR_CODES_T)s_video_decode_op.u4_error_code) + { + flush_output(codec_obj, &s_app_ctx, ps_out_buf, + pu1_bs_buf, &u4_op_frm_ts, + ps_op_file, ps_op_chksum_file, + u4_ip_frm_ts, u4_bytes_remaining); + + printf("Reinit codec with width %d and height %d\n", + s_video_decode_op.u4_pic_wd, + s_video_decode_op.u4_pic_ht); + + break; + } + } + + if((1 == s_app_ctx.display) && + (1 == s_video_decode_op.u4_output_present)) + { + dispq_producer_queue(&s_app_ctx); + } + + if(IV_B_FRAME == s_video_decode_op.e_pic_type) + s_app_ctx.b_pic_present |= 1; + + u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed; + + file_pos += u4_num_bytes_dec; + total_bytes_comsumed += u4_num_bytes_dec; + u4_ip_frm_ts++; + + + if(1 == s_video_decode_op.u4_output_present) + { + width = s_video_decode_op.s_disp_frm_buf.u4_y_wd; + height = s_video_decode_op.s_disp_frm_buf.u4_y_ht; + dump_output(&s_app_ctx, &(s_video_decode_op.s_disp_frm_buf), + s_video_decode_op.u4_disp_buf_id, ps_op_file, + ps_op_chksum_file, + u4_op_frm_ts, s_app_ctx.u4_file_save_flag, + s_app_ctx.u4_chksum_save_flag); + + u4_op_frm_ts++; + } + else + { + if((s_video_decode_op.u4_error_code >> IVD_FATALERROR) & 1) + { + printf("Fatal error\n"); + break; + } + } + + } + } + + /***********************************************************************/ + /* To get the last decoded frames, call process with NULL input */ + /***********************************************************************/ + flush_output(codec_obj, &s_app_ctx, ps_out_buf, + pu1_bs_buf, &u4_op_frm_ts, + ps_op_file, ps_op_chksum_file, + u4_ip_frm_ts, u4_bytes_remaining); + + /* set disp_end flag */ + s_app_ctx.quit = 1; + + +#ifdef PROFILE_ENABLE + printf("Summary\n"); + printf("Input filename : %s\n", s_app_ctx.ac_ip_fname); + printf("Output Width : %-4d\n", width); + printf("Output Height : %-4d\n", height); + + if(frm_cnt) + { + double avg = u4_tot_cycles / frm_cnt; + double bytes_avg = total_bytes_comsumed / frm_cnt; + double bitrate = (bytes_avg * 8 * s_app_ctx.fps) / 1000000; + printf("Bitrate @ %2d fps(mbps) : %-6.2f\n", s_app_ctx.fps, bitrate); + printf("Average decode time(micro sec) : %-6d\n", (WORD32)avg); + printf("Avg Peak decode time(%2d frames) : %-6d\n", PEAK_WINDOW_SIZE, (WORD32)peak_avg_max); + avg = (u4_tot_cycles + u4_tot_fmt_cycles) * 1.0 / frm_cnt; + + if(0 == s_app_ctx.share_disp_buf) + printf("FPS achieved (with format conv) : %-3.2f\n", 1000000 / avg); + else + printf("FPS achieved : %-3.2f\n", 1000000 / avg); + } +#endif + /***********************************************************************/ + /* Clear the decoder, close all the files, free all the memory */ + /***********************************************************************/ + if(1 == s_app_ctx.display) + { + s_app_ctx.display_deinit_flag = 1; + /* wait for display to finish */ + if(s_app_ctx.display_thread_created) + { + ithread_join(s_app_ctx.display_thread_handle, NULL); + } + free(s_app_ctx.display_thread_handle); + } + + { + iv_retrieve_mem_rec_ip_t s_retrieve_dec_ip; + iv_retrieve_mem_rec_op_t s_retrieve_dec_op; + s_retrieve_dec_ip.pv_mem_rec_location = (iv_mem_rec_t *)pv_mem_rec_location; + + s_retrieve_dec_ip.e_cmd = IV_CMD_RETRIEVE_MEMREC; + s_retrieve_dec_ip.u4_size = sizeof(iv_retrieve_mem_rec_ip_t); + s_retrieve_dec_op.u4_size = sizeof(iv_retrieve_mem_rec_op_t); + + ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_retrieve_dec_ip, + (void *)&s_retrieve_dec_op); + + if(IV_SUCCESS != ret) + { + sprintf(ac_error_str, "Error in Retrieve Memrec"); + codec_exit(ac_error_str); + } + + { + iv_mem_rec_t *ps_mem_rec; + UWORD16 u2_i; + + u4_num_mem_recs = s_retrieve_dec_op.u4_num_mem_rec_filled; + + ps_mem_rec = s_retrieve_dec_ip.pv_mem_rec_location; + + for(u2_i = 0; u2_i < u4_num_mem_recs; u2_i++) + { + app_aligned_free(ps_mem_rec->pv_base); + ps_mem_rec++; + } + free(s_retrieve_dec_ip.pv_mem_rec_location); + } + + } + /***********************************************************************/ + /* Close all the files and free all the memory */ + /***********************************************************************/ + { + fclose(ps_ip_file); + + if(1 == s_app_ctx.u4_file_save_flag) + { + fclose(ps_op_file); + } + if(1 == s_app_ctx.u4_chksum_save_flag) + { + fclose(ps_op_chksum_file); + } + + } + + if(0 == s_app_ctx.share_disp_buf) + { + free(ps_out_buf->pu1_bufs[0]); + } + + for(i = 0; i < s_app_ctx.num_disp_buf; i++) + { + free(s_app_ctx.s_disp_buffers[i].pu1_bufs[0]); + } + + free(ps_out_buf); + free(pu1_bs_buf); + + return (0); +} |