summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--[-rwxr-xr-x]Android.mk0
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_arm_memory_barrier.s3
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_deblk_chroma_a9.s48
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_deblk_luma_a9.s24
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_default_weighted_pred_a9q.s9
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_ihadamard_scaling_a9.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_chroma_a9q.s18
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_filters_luma_horz_a9q.s21
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_filters_luma_vert_a9q.s18
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_bilinear_a9q.s22
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_copy_a9q.s10
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s16
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s20
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s11
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s16
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_chroma_a9q.s32
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_luma_16x16_a9q.s42
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_luma_4x4_a9q.s55
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_intra_pred_luma_8x8_a9q.s59
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_iquant_itrans_recon_a9.s16
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_iquant_itrans_recon_dc_a9.s17
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_itrans_recon_a9.s8
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_mem_fns_neon.s14
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_padding_neon.s25
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_platform_macros.h0
-rwxr-xr-xcommon/arm/ih264_resi_trans_a9.s604
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_resi_trans_quant_a9.s2
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_weighted_bi_pred_a9q.s8
-rw-r--r--[-rwxr-xr-x]common/arm/ih264_weighted_pred_a9q.s8
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_deblk_chroma_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_deblk_luma_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_default_weighted_pred_av8.s1
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_ihadamard_scaling_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_chroma_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_filters_luma_horz_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_filters_luma_vert_av8.s8
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_luma_copy_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s4
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s2
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_intra_pred_chroma_av8.s2
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_intra_pred_luma_16x16_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_intra_pred_luma_4x4_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_intra_pred_luma_8x8_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_iquant_itrans_recon_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_iquant_itrans_recon_dc_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_mem_fns_neon_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_neon_macros.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_padding_neon_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_resi_trans_quant_av8.s0
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_weighted_bi_pred_av8.s1
-rw-r--r--[-rwxr-xr-x]common/armv8/ih264_weighted_pred_av8.s1
-rw-r--r--[-rwxr-xr-x]common/ih264_buf_mgr.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_buf_mgr.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_cabac_tables.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_cabac_tables.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_cavlc_tables.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_cavlc_tables.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_chroma_intra_pred_filters.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_common_tables.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_common_tables.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_deblk_edge_filters.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_deblk_edge_filters.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_deblk_tables.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_deblk_tables.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_debug.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_defs.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_disp_mgr.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_disp_mgr.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_dpb_mgr.c2
-rw-r--r--[-rwxr-xr-x]common/ih264_dpb_mgr.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_error.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_ihadamard_scaling.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_inter_pred_filters.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_inter_pred_filters.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_intra_pred_filters.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_iquant_itrans_recon.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_itrans_recon.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_list.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_list.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_luma_intra_pred_filters.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_macros.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_mem_fns.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_mem_fns.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_padding.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_padding.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_resi_trans.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_resi_trans_quant.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_size_defs.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_structs.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_trans_data.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_trans_data.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_trans_macros.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_trans_quant_itrans_iquant.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_typedefs.h0
-rw-r--r--[-rwxr-xr-x]common/ih264_weighted_pred.c0
-rw-r--r--[-rwxr-xr-x]common/ih264_weighted_pred.h0
-rw-r--r--[-rwxr-xr-x]common/ithread.c5
-rw-r--r--[-rwxr-xr-x]common/ithread.h0
-rw-r--r--[-rwxr-xr-x]common/mips/ih264_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_chroma_intra_pred_filters_ssse3.c0
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_deblk_chroma_ssse3.c0
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_deblk_luma_ssse3.c2
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_ihadamard_scaling_sse42.c31
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_ihadamard_scaling_ssse3.c19
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_inter_pred_filters_ssse3.c5
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_iquant_itrans_recon_dc_ssse3.c12
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_iquant_itrans_recon_sse42.c3
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_iquant_itrans_recon_ssse3.c5
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_luma_intra_pred_filters_ssse3.c0
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_mem_fns_ssse3.c0
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_padding_ssse3.c0
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_resi_trans_quant_sse42.c9
-rw-r--r--[-rwxr-xr-x]common/x86/ih264_weighted_pred_sse42.c0
-rw-r--r--[-rwxr-xr-x]decoder.arm.mk0
-rw-r--r--[-rwxr-xr-x]decoder.arm64.mk0
-rw-r--r--[-rwxr-xr-x]decoder.mips.mk0
-rw-r--r--[-rwxr-xr-x]decoder.mips64.mk0
-rw-r--r--[-rwxr-xr-x]decoder.mk0
-rw-r--r--[-rwxr-xr-x]decoder.x86.mk0
-rw-r--r--[-rwxr-xr-x]decoder.x86_64.mk0
-rw-r--r--[-rwxr-xr-x]decoder/arm/ih264d_function_selector.c0
-rw-r--r--[-rwxr-xr-x]decoder/arm/ih264d_function_selector_a9q.c0
-rw-r--r--[-rwxr-xr-x]decoder/arm/ih264d_function_selector_av8.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_api.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_bitstrm.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_bitstrm.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_cabac.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_cabac.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_cabac_init_tables.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_compute_bs.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_deblocking.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_deblocking.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_debug.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_debug.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_defs.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_dpb_manager.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_dpb_mgr.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_error_handler.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_format_conv.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_format_conv.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_function_selector.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_function_selector_generic.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_inter_pred.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_inter_pred.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_mb_utils.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_mb_utils.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_mem_request.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_mvpred.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_mvpred.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_nal.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_nal.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_bslice.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_cabac.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_cabac.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_cavlc.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_cavlc.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_headers.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_headers.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_islice.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_islice.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_mb_header.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_mb_header.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_pslice.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_slice.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_parse_slice.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_process_bslice.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_process_bslice.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_process_intra_mb.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_process_intra_mb.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_process_pslice.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_process_pslice.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_quant_scaling.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_quant_scaling.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_sei.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_sei.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_structs.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_tables.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_tables.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_thread_compute_bs.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_thread_compute_bs.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_thread_parse_decode.c11
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_thread_parse_decode.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_transfer_address.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_utils.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_utils.h0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_vui.c0
-rw-r--r--[-rwxr-xr-x]decoder/ih264d_vui.h0
-rw-r--r--[-rwxr-xr-x]decoder/iv.h0
-rw-r--r--[-rwxr-xr-x]decoder/ivd.h0
-rw-r--r--[-rwxr-xr-x]decoder/mips/ih264d_function_selector.c0
-rw-r--r--[-rwxr-xr-x]decoder/x86/ih264d_function_selector.c0
-rw-r--r--[-rwxr-xr-x]decoder/x86/ih264d_function_selector_sse42.c0
-rw-r--r--[-rwxr-xr-x]decoder/x86/ih264d_function_selector_ssse3.c0
-rw-r--r--[-rwxr-xr-x]encoder.arm.mk0
-rw-r--r--[-rwxr-xr-x]encoder.arm64.mk0
-rw-r--r--[-rwxr-xr-x]encoder.mips.mk0
-rw-r--r--[-rwxr-xr-x]encoder.mips64.mk0
-rw-r--r--[-rwxr-xr-x]encoder.mk0
-rw-r--r--[-rwxr-xr-x]encoder.x86.mk0
-rw-r--r--[-rwxr-xr-x]encoder.x86_64.mk0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_fmt_conv.s0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_function_selector.c0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_function_selector_a9q.c0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_function_selector_av8.c0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_half_pel.s0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ih264e_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/arm/ime_distortion_metrics_a9q.s1350
-rw-r--r--[-rwxr-xr-x]encoder/arm/ime_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s2
-rw-r--r--[-rwxr-xr-x]encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s0
-rw-r--r--[-rwxr-xr-x]encoder/armv8/ih264e_half_pel_av8.s7
-rw-r--r--[-rwxr-xr-x]encoder/armv8/ih264e_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/armv8/ime_distortion_metrics_av8.s1
-rw-r--r--[-rwxr-xr-x]encoder/armv8/ime_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_api.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_bitstream.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_bitstream.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_cavlc.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_cavlc.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_config.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_core_coding.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_core_coding.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_deblk.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_deblk.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_debug.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_defs.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_encode.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_encode_header.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_encode_header.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_error.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_fmt_conv.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_fmt_conv.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_function_selector_generic.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_globals.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_globals.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_half_pel.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_half_pel.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_intra_modes_eval.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_intra_modes_eval.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_list.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_master.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_mc.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_mc.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_me.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_me.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_modify_frm_rate.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_modify_frm_rate.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_process.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_process.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_rate_control.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_rate_control.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_rc_mem_interface.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_rc_mem_interface.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_statistics.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_structs.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_time_stamp.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_time_stamp.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_trace.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_trace_support.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_utils.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_utils.h0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_version.c0
-rw-r--r--[-rwxr-xr-x]encoder/ih264e_version.h0
-rw-r--r--[-rwxr-xr-x]encoder/ime.c0
-rw-r--r--[-rwxr-xr-x]encoder/ime.h0
-rw-r--r--[-rwxr-xr-x]encoder/ime_defs.h0
-rw-r--r--[-rwxr-xr-x]encoder/ime_distortion_metrics.c0
-rw-r--r--[-rwxr-xr-x]encoder/ime_distortion_metrics.h0
-rw-r--r--[-rwxr-xr-x]encoder/ime_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/ime_statistics.h0
-rw-r--r--[-rwxr-xr-x]encoder/ime_structs.h0
-rw-r--r--[-rwxr-xr-x]encoder/ime_typedefs.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_bit_allocation.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_bit_allocation.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_cbr_buffer_control.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_cbr_buffer_control.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_cntrl_param.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_common.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_datatypes.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_est_sad.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_est_sad.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_fixed_point_error_bits.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_fixed_point_error_bits.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_frame_info_collector.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_frame_info_collector.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_mb_model_based.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_mb_model_based.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_mem_req_and_acq.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_picture_type.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_picture_type.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_rate_control_api.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_rate_control_api.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_rate_control_api_structs.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_rd_model.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_rd_model.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_rd_model_struct.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_trace_support.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_vbr_storage_vbv.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_vbr_storage_vbv.h0
-rw-r--r--[-rwxr-xr-x]encoder/irc_vbr_str_prms.c0
-rw-r--r--[-rwxr-xr-x]encoder/irc_vbr_str_prms.h0
-rw-r--r--[-rwxr-xr-x]encoder/ithread.h0
-rw-r--r--[-rwxr-xr-x]encoder/iv2.h0
-rw-r--r--[-rwxr-xr-x]encoder/ive2.h0
-rw-r--r--[-rwxr-xr-x]encoder/mips/ih264e_function_selector.c0
-rw-r--r--[-rwxr-xr-x]encoder/mips/ih264e_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/mips/ime_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/x86/ih264e_function_selector.c0
-rw-r--r--[-rwxr-xr-x]encoder/x86/ih264e_function_selector_sse42.c0
-rw-r--r--[-rwxr-xr-x]encoder/x86/ih264e_function_selector_ssse3.c0
-rw-r--r--[-rwxr-xr-x]encoder/x86/ih264e_half_pel_ssse3.c0
-rw-r--r--[-rwxr-xr-x]encoder/x86/ih264e_intra_modes_eval_ssse3.c2
-rw-r--r--[-rwxr-xr-x]encoder/x86/ih264e_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]encoder/x86/ime_distortion_metrics_sse42.c3
-rw-r--r--[-rwxr-xr-x]encoder/x86/ime_platform_macros.h0
-rw-r--r--[-rwxr-xr-x]test/Android.mk0
-rw-r--r--[-rwxr-xr-x]test/decoder.mk0
-rw-r--r--[-rwxr-xr-x]test/decoder/main.c0
-rw-r--r--[-rwxr-xr-x]test/encoder.mk0
-rw-r--r--[-rwxr-xr-x]test/encoder/app.h0
-rw-r--r--[-rwxr-xr-x]test/encoder/input.c0
-rw-r--r--[-rwxr-xr-x]test/encoder/main.c20
-rw-r--r--[-rwxr-xr-x]test/encoder/output.c0
-rw-r--r--[-rwxr-xr-x]test/encoder/psnr.c0
-rw-r--r--[-rwxr-xr-x]test/encoder/psnr.h0
-rw-r--r--[-rwxr-xr-x]test/encoder/recon.c0
339 files changed, 1051 insertions, 1625 deletions
diff --git a/Android.mk b/Android.mk
index 0085832..0085832 100755..100644
--- a/Android.mk
+++ b/Android.mk
diff --git a/common/arm/ih264_arm_memory_barrier.s b/common/arm/ih264_arm_memory_barrier.s
index 523218f..3816409 100755..100644
--- a/common/arm/ih264_arm_memory_barrier.s
+++ b/common/arm/ih264_arm_memory_barrier.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@*******************************************************************************
@* @file
@* ih264_arm_memory_barrier.s
@@ -39,7 +39,6 @@
.text
.p2align 2
-
@*****************************************************************************
@*
@* Function Name : ih264_arm_dsb
diff --git a/common/arm/ih264_deblk_chroma_a9.s b/common/arm/ih264_deblk_chroma_a9.s
index 66102a7..8c9960a 100755..100644
--- a/common/arm/ih264_deblk_chroma_a9.s
+++ b/common/arm/ih264_deblk_chroma_a9.s
@@ -54,7 +54,7 @@
.text
.p2align 2
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -84,7 +84,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bs4_bp_a9
@@ -130,7 +130,7 @@ ih264_deblk_chroma_horz_bs4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -160,7 +160,7 @@ ih264_deblk_chroma_horz_bs4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_bp_a9
@@ -224,7 +224,7 @@ ih264_deblk_chroma_vert_bs4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -260,7 +260,7 @@ ih264_deblk_chroma_vert_bs4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bslt4_bp_a9
@@ -326,7 +326,7 @@ ih264_deblk_chroma_horz_bslt4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -362,7 +362,7 @@ ih264_deblk_chroma_horz_bslt4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_bp_a9
@@ -465,7 +465,7 @@ ih264_deblk_chroma_vert_bslt4_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -495,7 +495,7 @@ ih264_deblk_chroma_vert_bslt4_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_mbaff_bp_a9
@@ -543,7 +543,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -579,7 +579,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9
@@ -656,7 +656,7 @@ ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -692,7 +692,7 @@ ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bs4_a9
@@ -743,7 +743,7 @@ ih264_deblk_chroma_horz_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -779,7 +779,7 @@ ih264_deblk_chroma_horz_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_a9
@@ -848,7 +848,7 @@ ih264_deblk_chroma_vert_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -893,7 +893,7 @@ ih264_deblk_chroma_vert_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_horz_bslt4_a9
@@ -968,7 +968,7 @@ ih264_deblk_chroma_horz_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -1013,7 +1013,7 @@ ih264_deblk_chroma_horz_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_a9
@@ -1119,7 +1119,7 @@ ih264_deblk_chroma_vert_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -1155,7 +1155,7 @@ ih264_deblk_chroma_vert_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bs4_mbaff_a9
@@ -1206,7 +1206,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -1251,7 +1251,7 @@ ih264_deblk_chroma_vert_bs4_mbaff_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_chroma_vert_bslt4_mbaff_a9
diff --git a/common/arm/ih264_deblk_luma_a9.s b/common/arm/ih264_deblk_luma_a9.s
index 3e6a4d9..9217ed2 100755..100644
--- a/common/arm/ih264_deblk_luma_a9.s
+++ b/common/arm/ih264_deblk_luma_a9.s
@@ -47,7 +47,7 @@
.text
.p2align 2
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -83,7 +83,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_horz_bslt4_a9
@@ -187,7 +187,7 @@ ih264_deblk_luma_horz_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -217,7 +217,7 @@ ih264_deblk_luma_horz_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_horz_bs4_a9
@@ -353,7 +353,7 @@ ih264_deblk_luma_horz_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -389,7 +389,7 @@ ih264_deblk_luma_horz_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bslt4_a9
@@ -574,7 +574,7 @@ ih264_deblk_luma_vert_bslt4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -604,7 +604,7 @@ ih264_deblk_luma_vert_bslt4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bs4_a9
@@ -800,7 +800,7 @@ ih264_deblk_luma_vert_bs4_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -830,7 +830,7 @@ ih264_deblk_luma_vert_bs4_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bs4_mbaff_a9
@@ -942,7 +942,7 @@ ih264_deblk_luma_vert_bs4_mbaff_a9:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -978,7 +978,7 @@ ih264_deblk_luma_vert_bs4_mbaff_a9:
@* None
@*
@*******************************************************************************
-@*/
+@*
.global ih264_deblk_luma_vert_bslt4_mbaff_a9
diff --git a/common/arm/ih264_default_weighted_pred_a9q.s b/common/arm/ih264_default_weighted_pred_a9q.s
index 94cda46..a4688f2 100755..100644
--- a/common/arm/ih264_default_weighted_pred_a9q.s
+++ b/common/arm/ih264_default_weighted_pred_a9q.s
@@ -17,14 +17,13 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_default_weighted_pred_a9q.s
@*
@* @brief
@* Contains function definitions for default weighted prediction.
-@* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
@*
@* @author
@* Kaushik Senthoor R
@@ -38,7 +37,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@*******************************************************************************
@* @function
@* ih264_default_weighted_pred_luma_a9q()
@@ -82,7 +81,7 @@
@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_default_weighted_pred_luma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
@@ -256,7 +255,7 @@ end_loops:
@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_default_weighted_pred_chroma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_ihadamard_scaling_a9.s b/common/arm/ih264_ihadamard_scaling_a9.s
index 687099a..c7feddd 100755..100644
--- a/common/arm/ih264_ihadamard_scaling_a9.s
+++ b/common/arm/ih264_ihadamard_scaling_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_ihadamard_scaling_a9.s
@@ -37,7 +37,7 @@
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
@ * of a 16x16 intra prediction macroblock, and then performs scaling.
@ * prediction buffer
@@ -69,10 +69,10 @@
@ * @remarks none
@ *
@ *******************************************************************************
-@ */
+@ *
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_ihadamard_scaling_4x4(WORD16* pi2_src,
@ WORD16* pi2_out,
@ const UWORD16 *pu2_iscal_mat,
@@ -161,7 +161,7 @@ ih264_ihadamard_scaling_4x4_a9:
@ *******************************************************************************
-@ */
+@ *
@ * @brief This function performs a 2x2 inverse hadamard transform for chroma block
@ *
@ * @par Description:
@@ -189,10 +189,10 @@ ih264_ihadamard_scaling_4x4_a9:
@ * @remarks none
@ *
@ *******************************************************************************
-@ */
+@ *
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src,
@ WORD16* pi2_out,
@ const UWORD16 *pu2_iscal_mat,
diff --git a/common/arm/ih264_inter_pred_chroma_a9q.s b/common/arm/ih264_inter_pred_chroma_a9q.s
index afd2860..6681a7c 100755..100644
--- a/common/arm/ih264_inter_pred_chroma_a9q.s
+++ b/common/arm/ih264_inter_pred_chroma_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_chroma_a9q.s
@@ -36,16 +36,16 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -85,7 +85,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_chroma(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@@ -112,8 +112,6 @@
ih264_inter_pred_chroma_a9q:
-
-
stmfd sp!, {r4-r12, r14} @store register values to stack
vstmdb sp!, {d8-d15} @push neon registers to stack
ldr r4, [sp, #104]
diff --git a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
index ea6bba0..62b4b94 100755..100644
--- a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
+++ b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_a9q.s
@@ -36,13 +36,13 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -76,7 +76,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_horz (
@ UWORD8 *pu1_src,
@@ -102,6 +102,9 @@
ih264_inter_pred_luma_horz_a9q:
+
+
+
stmfd sp!, {r4-r12, r14} @store register values to stack
vstmdb sp!, {d8-d15} @push neon registers to stack
ldr r5, [sp, #104] @Loads ht
@@ -116,7 +119,7 @@ ih264_inter_pred_luma_horz_a9q:
beq loop_4
loop_16: @when wd=16
- @// Processing row0 and row1
+ @ Processing row0 and row1
vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0 ;for checking loop
vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
@@ -173,7 +176,7 @@ loop_16: @when wd=16
b loop_16 @ loop if height == 8 or 16
loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.8 {d5, d6}, [r0], r2 @// Load row1
vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
vld1.8 {d2, d3}, [r0], r2 @// Load row0
@@ -204,7 +207,7 @@ loop_8:
beq end_func @ Branch if height==4
- b loop_8 @looping if height =8 or 16
+ b loop_8 @looping if height =8 or 16
loop_4:
vld1.8 {d5, d6}, [r0], r2 @// Load row1
diff --git a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
index 5b29e02..65c40a6 100755..100644
--- a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
+++ b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_vert_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -195,10 +195,10 @@ loop_16: @when wd=16
subne r0, r0, r2
beq end_func @ Branch if height==4
- b loop_16 @ looping if height = 8 or 16
+ b loop_16 @ looping if height = 8 or 16
loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.u32 d0, [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1, [r0], r2 @ Vector load from src[1_0]
@@ -248,7 +248,7 @@ loop_8:
loop_4:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0]
diff --git a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
index 6a3c83d..8f049f8 100755..100644
--- a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_bilinear_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@ *******************************************************************************
@ * function:ih264_inter_pred_luma_bilinear
@ *
@@ -89,7 +89,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@@ -192,7 +192,7 @@ loop_16: @when wd=16
subs r12, r6, #8
vst1.8 {q15}, [r2], r5 @//Store dest row7
- beq end_func @ end function if ht=8
+ beq end_func @ end function if ht=8
vld1.8 {q0}, [r0], r3 @// Load row8 ;src1
vaddl.u8 q10, d0, d4
@@ -275,7 +275,7 @@ loop_8: @wd=8;
vqrshrun.s16 d31, q13, #1
subs r12, r6, #4
vst1.8 {d31}, [r2], r5 @//Store dest row3
- beq end_func @ end function if ht=4
+ beq end_func @ end function if ht=4
vld1.8 {d12}, [r1], r4 @// Load row4 ;src2
vld1.8 {d8}, [r0], r3 @// Load row4 ;src1
@@ -298,7 +298,7 @@ loop_8: @wd=8;
vqrshrun.s16 d31, q11, #1
subs r12, r6, #8
vst1.8 {d31}, [r2], r5 @//Store dest row7
- beq end_func @ end function if ht=8
+ beq end_func @ end function if ht=8
vld1.8 {d0}, [r0], r3 @// Load row8 ;src1
vld1.8 {d4}, [r1], r4 @// Load row8 ;src2
@@ -367,7 +367,7 @@ loop_4:
vqrshrun.s16 d31, q13, #1
subs r12, r6, #4
vst1.32 d31[0], [r2], r5 @//Store dest row3
- beq end_func @ end function if ht=4
+ beq end_func @ end function if ht=4
vld1.32 d12[0], [r1], r4 @// Load row4 ;src2
vld1.32 d8[0], [r0], r3 @// Load row4 ;src1
diff --git a/common/arm/ih264_inter_pred_luma_copy_a9q.s b/common/arm/ih264_inter_pred_luma_copy_a9q.s
index 8ba2fbf..c0b0568 100755..100644
--- a/common/arm/ih264_inter_pred_luma_copy_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_copy_a9q.s
@@ -17,8 +17,8 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
-@/**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -53,7 +53,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_copy (
@ UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@@ -182,7 +182,7 @@ end_inner_loop_wd_16:
ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
-@ /*
+@ *
@ ********************************************************************************
@ *
@ * @brief This function copies a 4x4 block to destination
@@ -208,7 +208,7 @@ end_inner_loop_wd_16:
@ * Currently wd and height is not used, ie a 4x4 block is always copied
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_interleave_copy(WORD16 *pi2_src,
@ UWORD8 *pu1_out,
@ WORD32 pred_strd,
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
index 43321a8..54183f0 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -88,7 +88,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
index 65a6de7..c8edf38 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -91,7 +91,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@@ -835,7 +835,7 @@ loop_8:
vmov q7, q14
vst1.32 d30, [r1], r3 @ store row 3
- bgt loop_8 @if height =8 or 16 loop
+ bgt loop_8 @if height =8 or 16 loop
b end_func
loop_4_start:
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
index c39ae01..ab1d1d1 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -30,19 +30,19 @@
@*
@* @par List of Functions:
@*
-@* - ih264_inter_pred_luma_horz_qpe_a9ql()
+@* - ih264_inter_pred_luma_horz_qpel_a9q()
@*
@* @remarks
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -79,7 +79,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_horz (
@ UWORD8 *pu1_src,
@@ -126,7 +126,7 @@ ih264_inter_pred_luma_horz_qpel_a9q:
beq loop_4
loop_16: @when wd=16
- @// Processing row0 and row1
+ @ Processing row0 and row1
vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0
vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
@@ -187,7 +187,7 @@ loop_16: @when wd=16
b loop_16
loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.8 {d5, d6}, [r0], r2 @// Load row1
vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
@@ -221,7 +221,7 @@ loop_8:
subs r5, r5, #2 @ 2 rows done, decrement by 2
beq end_func @ Branch if height==4
- b loop_8 @looping if height == 8 or 16
+ b loop_8 @looping if height == 8 or 16
loop_4:
vld1.8 {d5, d6}, [r0], r2 @// Load row1
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
index 565cc80..3c63ca3 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
@@ -36,14 +36,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@*******************************************************************************
@*
@* @brief
@@ -91,7 +91,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
index 3c8b60a..cfe03a0 100755..100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
@@ -36,14 +36,11 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
-@/**
@*******************************************************************************
@*
@* @brief
@@ -90,7 +87,7 @@
@* None
@*
@*******************************************************************************
-@*/;
+@*;
@void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
index d45055e..e2c68ef 100755..100644
--- a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_inter_pred_luma_vert_qpel_a9q.s
@@ -36,13 +36,11 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
@
-@/**
-@/**
@*******************************************************************************
@*
@* @brief
@@ -79,7 +77,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_inter_pred_luma_vert (
@ UWORD8 *pu1_src,
@@ -211,12 +209,12 @@ loop_16: @when wd=16
subne r0, r0, r2
beq end_func @ Branch if height==4
- b loop_16 @ looping if height = 8 or 16
+ b loop_16 @ looping if height = 8 or 16
loop_8:
- @// Processing row0 and row1
+ @ Processing row0 and row1
vld1.u32 d0, [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1, [r0], r2 @ Vector load from src[1_0]
vld1.u32 d2, [r0], r2 @ Vector load from src[2_0]
@@ -270,7 +268,7 @@ loop_8:
b loop_8 @looping if height == 8 or 16
loop_4:
-@// Processing row0 and row1
+@ Processing row0 and row1
vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0]
vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0]
diff --git a/common/arm/ih264_intra_pred_chroma_a9q.s b/common/arm/ih264_intra_pred_chroma_a9q.s
index d03fc55..ccd5c0d 100755..100644
--- a/common/arm/ih264_intra_pred_chroma_a9q.s
+++ b/common/arm/ih264_intra_pred_chroma_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_chroma_a9q.s
@@ -39,15 +39,11 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
+@* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
@
-@/**
-@/**
-@/**
-@
.text
.p2align 2
@@ -60,7 +56,7 @@ scratch_chroma_intrapred_addr1:
scratch_intrapred_chroma_plane_addr1:
.long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_dc
@@ -91,7 +87,7 @@ scratch_intrapred_chroma_plane_addr1:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -105,8 +101,6 @@ scratch_intrapred_chroma_plane_addr1:
@ r3 => dst_strd
@ r4 => ui_neighboravailability
-
-
.global ih264_intra_pred_chroma_8x8_mode_dc_a9q
ih264_intra_pred_chroma_8x8_mode_dc_a9q:
@@ -191,10 +185,10 @@ str_pred:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_horz
@@ -226,7 +220,7 @@ str_pred:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -270,7 +264,7 @@ loop_8x8_horz:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_vert
@@ -339,10 +333,10 @@ ih264_intra_pred_chroma_8x8_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_chroma_8x8_mode_plane
@@ -373,7 +367,7 @@ ih264_intra_pred_chroma_8x8_mode_vert_a9q:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -393,7 +387,6 @@ ih264_intra_pred_chroma_8x8_mode_plane_a9q:
stmfd sp!, {r4-r10, r12, lr}
vpush {d8-d15}
-
vld1.32 d0, [r0]
add r10, r0, #10
vld1.32 d1, [r10]
@@ -542,7 +535,6 @@ scrlblc2:
end_func_plane:
-
vpop {d8-d15}
ldmfd sp!, {r4-r10, r12, pc}
diff --git a/common/arm/ih264_intra_pred_luma_16x16_a9q.s b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
index e38e203..0dd82f3 100755..100644
--- a/common/arm/ih264_intra_pred_luma_16x16_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_16x16_a9q.s
@@ -39,14 +39,14 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@* All the functions here are replicated from ih264_intra_pred_filters.c
@
-@/**
-@/**
-@/**
+@**
+@**
+@**
@
.text
@@ -57,10 +57,10 @@
.hidden ih264_gai1_intrapred_luma_plane_coeffs
scratch_intrapred_addr1:
.long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_vert_a9q
+@*ih264_intra_pred_luma_16x16_mode_vert
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:vertical
@@ -135,13 +135,13 @@ ih264_intra_pred_luma_16x16_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_horz_a9q
+@*ih264_intra_pred_luma_16x16_mode_horz
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:horizontal
@@ -170,7 +170,7 @@ ih264_intra_pred_luma_16x16_mode_vert_a9q:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -213,13 +213,13 @@ loop_16x16_horz:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_dc_a9q
+@*ih264_intra_pred_luma_16x16_mode_dc
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:DC
@@ -247,7 +247,7 @@ loop_16x16_horz:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -300,7 +300,7 @@ top_available: @ONLY TOP AVAILABLE
vdup.u8 q0, d0[0]
b str_pred
-left_available: @ONLY LEFT AVAILABLE
+left_available: @ONLY LEFT AVAILABLE
vld1.u8 {q0}, [r0]
vpaddl.u8 q0, q0
vadd.u16 d0, d0, d1
@@ -337,13 +337,13 @@ str_pred:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
-@*ih264_intra_pred_luma_16x16_mode_plane_a9q
+@*ih264_intra_pred_luma_16x16_mode_plane
@*
@* @brief
@* Perform Intra prediction for luma_16x16 mode:PLANE
@@ -371,7 +371,7 @@ str_pred:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
diff --git a/common/arm/ih264_intra_pred_luma_4x4_a9q.s b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
index cb386ea..5cc7e23 100755..100644
--- a/common/arm/ih264_intra_pred_luma_4x4_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_4x4_a9q.s
@@ -44,21 +44,16 @@
@* None
@*
@*******************************************************************************
-@*/
-
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
-@
+@*
-@/**
-@/**
-@/**
+@* All the functions here are replicated from ih264_intra_pred_filters.c
@
.text
.p2align 2
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_vert
@@ -128,10 +123,10 @@ ih264_intra_pred_luma_4x4_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_horz
@@ -163,7 +158,7 @@ ih264_intra_pred_luma_4x4_mode_vert_a9q:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -210,10 +205,10 @@ ih264_intra_pred_luma_4x4_mode_horz_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_dc
@@ -244,7 +239,7 @@ ih264_intra_pred_luma_4x4_mode_horz_a9q:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -352,7 +347,7 @@ end_func:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_diag_dl
@@ -383,7 +378,7 @@ end_func:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -434,7 +429,7 @@ end_func_diag_dl:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_diag_dr
@@ -465,7 +460,7 @@ end_func_diag_dl:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -514,7 +509,7 @@ end_func_diag_dr:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_vert_r
@@ -545,7 +540,7 @@ end_func_diag_dr:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -596,7 +591,7 @@ end_func_vert_r:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_horz_d
@@ -627,7 +622,7 @@ end_func_vert_r:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -659,7 +654,7 @@ ih264_intra_pred_luma_4x4_mode_horz_d_a9q:
vqrshrun.s16 d5, q12, #2
sub r5, r3, #2
vmov.8 d6, d5
- vtrn.8 d4, d5 @
+ vtrn.8 d4, d5 @
vst1.u16 {d5[1]}, [r1]!
vst1.16 {d6[2]}, [r1], r5
vst1.u16 {d4[1]}, [r1]!
@@ -678,7 +673,7 @@ end_func_horz_d:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_vert_l
@@ -709,7 +704,7 @@ end_func_horz_d:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -759,7 +754,7 @@ end_func_vert_l:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_4x4_mode_horz_u
@@ -790,7 +785,7 @@ end_func_vert_l:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -825,9 +820,9 @@ ih264_intra_pred_luma_4x4_mode_horz_u_a9q:
vext.8 d6, d5, d4, #1
vst1.8 {d4[2]}, [r1]!
vst1.8 {d6[0]}, [r1]!
- vtrn.8 d6, d5 @
+ vtrn.8 d6, d5 @
sub r5, r3, #2
- vtrn.8 d4, d6 @
+ vtrn.8 d4, d6 @
vdup.8 d7, r9
vst1.16 {d6[0]}, [r1], r5
vst1.16 {d6[0]}, [r1]!
diff --git a/common/arm/ih264_intra_pred_luma_8x8_a9q.s b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
index 6da1c95..352d29d 100755..100644
--- a/common/arm/ih264_intra_pred_luma_8x8_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_intra_pred_luma_8x8_a9q.s
@@ -45,17 +45,11 @@
@* None
@*
@*******************************************************************************
-@*/
-
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
-@
+@*
-@/**
-@/**
-@/**
+@* All the functions here are replicated from ih264_intra_pred_filters.c
@
-
.text
.p2align 2
@@ -64,7 +58,7 @@
scratch_intrapred_addr_8x8:
.long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_ref_filtering
@@ -95,7 +89,7 @@ scratch_intrapred_addr_8x8:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst)
@@ -111,7 +105,6 @@ ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
stmfd sp!, {r4-r12, r14} @store register values to stack
vpush {d8-d15}
-
vld1.u8 {q0}, [r0]! @
vld1.u8 {q1}, [r0]
add r0, r0, #8 @
@@ -141,6 +134,7 @@ ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
end_func_ref_filt:
+
vpop {d8-d15}
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
@@ -149,7 +143,7 @@ end_func_ref_filt:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert
@@ -219,10 +213,10 @@ ih264_intra_pred_luma_8x8_mode_vert_a9q:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz
@@ -254,7 +248,7 @@ ih264_intra_pred_luma_8x8_mode_vert_a9q:
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -299,10 +293,10 @@ loop_8x8_horz:
-@/******************************************************************************
+@******************************************************************************
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_dc
@@ -333,7 +327,7 @@ loop_8x8_horz:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -413,7 +407,7 @@ str_pred:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_diag_dl
@@ -444,7 +438,7 @@ str_pred:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -506,7 +500,7 @@ end_func_diag_dl:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_diag_dr
@@ -537,7 +531,7 @@ end_func_diag_dl:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -597,7 +591,7 @@ end_func_diag_dr:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert_r
@@ -628,7 +622,7 @@ end_func_diag_dr:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -717,7 +711,7 @@ end_func_vert_r:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz_d
@@ -748,7 +742,7 @@ end_func_vert_r:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -791,7 +785,7 @@ ih264_intra_pred_luma_8x8_mode_horz_d_a9q:
vmov.8 q4, q2
vmov.8 q5, q3
sub r6, r3, #6
- vtrn.8 q4, q5 @
+ vtrn.8 q4, q5 @
vmov.8 q6, q4
vmov.8 q7, q5
sub r5, r3, #4
@@ -835,7 +829,7 @@ end_func_horz_d:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_vert_l
@@ -866,7 +860,7 @@ end_func_horz_d:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -887,6 +881,7 @@ ih264_intra_pred_luma_8x8_mode_vert_l_a9q:
stmfd sp!, {r4-r12, r14} @Restoring registers from stack
vpush {d8-d15}
+
add r0, r0, #9
vld1.u8 {q0}, [r0]
add r0, r0, #1
@@ -935,7 +930,7 @@ end_func_vert_l:
-@/**
+@**
@*******************************************************************************
@*
@*ih264_intra_pred_luma_8x8_mode_horz_u
@@ -966,7 +961,7 @@ end_func_vert_l:
@* @remarks
@* None
@*
-@*******************************************************************************/
+@*******************************************************************************
@void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
diff --git a/common/arm/ih264_iquant_itrans_recon_a9.s b/common/arm/ih264_iquant_itrans_recon_a9.s
index f71ca69..4e49f6a 100755..100644
--- a/common/arm/ih264_iquant_itrans_recon_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_iquant_itrans_recon_a9.s
@@ -38,8 +38,8 @@
@ * None
@ *
@ *******************************************************************************
-@*/
-@/**
+@*
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -82,7 +82,7 @@
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -225,7 +225,7 @@ ih264_iquant_itrans_recon_4x4_a9:
ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
- @/**
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -268,7 +268,7 @@ ih264_iquant_itrans_recon_4x4_a9:
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -416,7 +416,7 @@ ih264_iquant_itrans_recon_chroma_4x4_a9:
ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
-@/*
+@*
@ *******************************************************************************
@ *
@ * @brief
@@ -459,7 +459,7 @@ ih264_iquant_itrans_recon_chroma_4x4_a9:
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
diff --git a/common/arm/ih264_iquant_itrans_recon_dc_a9.s b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
index 8d71bdb..97c4724 100755..100644
--- a/common/arm/ih264_iquant_itrans_recon_dc_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_iquant_itrans_recon_dc_a9.s
@@ -37,8 +37,8 @@
@ * None
@ *
@ *******************************************************************************
-@*/
-@/**
+@*
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -83,7 +83,7 @@
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -167,7 +167,7 @@ ih264_iquant_itrans_recon_4x4_dc_a9:
-@/*
+@*
@ *******************************************************************************
@ *
@ * @brief
@@ -212,7 +212,7 @@ ih264_iquant_itrans_recon_4x4_dc_a9:
@ * None
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -300,7 +300,7 @@ ih264_iquant_itrans_recon_8x8_dc_a9:
ldmfd sp!, {r4-r8, r15}
-@ /*
+@ *
@ ********************************************************************************
@ *
@ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
@@ -328,7 +328,7 @@ ih264_iquant_itrans_recon_8x8_dc_a9:
@ * @remarks none
@ *
@ *******************************************************************************
-@ */
+@ *
@ void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
@ UWORD8 *pu1_out,
@@ -368,6 +368,7 @@ ih264_iquant_itrans_recon_chroma_4x4_dc_a9:
vmov.u16 q15, #0x00ff
+
vld1.u8 d18, [r2], r0 @load out [8 bit size) -8 coeffs
vaddw.u8 q1, q0, d2 @Add pred
vld1.u8 d19, [r2], r0
diff --git a/common/arm/ih264_itrans_recon_a9.s b/common/arm/ih264_itrans_recon_a9.s
index 1d74da5..769d5d7 100755..100644
--- a/common/arm/ih264_itrans_recon_a9.s
+++ b/common/arm/ih264_itrans_recon_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_itrans_recon_neon_a9.s
@@ -33,8 +33,8 @@
@ * None
@ *
@ *******************************************************************************
-@*/
-@/**
+@*
+@**
@ *******************************************************************************
@ *
@ * @brief
@@ -72,7 +72,7 @@
@ *
@ *
@ *******************************************************************************
-@ */
+@ *
@void ih264_itrans_recon_4x4(
@ WORD16 *pi2_src,
@ UWORD8 *pu1_pred,
diff --git a/common/arm/ih264_mem_fns_neon.s b/common/arm/ih264_mem_fns_neon.s
index 2808897..39ad9b3 100755..100644
--- a/common/arm/ih264_mem_fns_neon.s
+++ b/common/arm/ih264_mem_fns_neon.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@ *******************************************************************************
@ * @file
@ * ih264_mem_fns_neon.s
@@ -40,9 +40,9 @@
@ * None
@ *
@ *******************************************************************************
-@*/
+@*
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -65,7 +65,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
@ UWORD8 *pu1_src,
@ UWORD8 num_bytes)
@@ -94,7 +94,7 @@ loop_neon_memcpy_mul_8:
@*******************************************************************************
-@*/
+@*
@void ih264_memcpy(UWORD8 *pu1_dst,
@ UWORD8 *pu1_src,
@ UWORD8 num_bytes)
@@ -143,6 +143,8 @@ loop_memcpy:
+
+
.global ih264_memset_mul_8_a9q
ih264_memset_mul_8_a9q:
@@ -208,6 +210,8 @@ loop_memset:
+
+
.global ih264_memset_16bit_mul_8_a9q
ih264_memset_16bit_mul_8_a9q:
diff --git a/common/arm/ih264_padding_neon.s b/common/arm/ih264_padding_neon.s
index 9bab268..e7a1f91 100755..100644
--- a/common/arm/ih264_padding_neon.s
+++ b/common/arm/ih264_padding_neon.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@*
@ *******************************************************************************
@ * @file
@ * ih264_padding_neon.s
@@ -39,10 +39,10 @@
@ * None
@ *
@ *******************************************************************************
-@*/
+@*
-@/**
+@**
@*******************************************************************************
@*
@* @brief pad at the top of a 2d array
@@ -67,7 +67,7 @@
@* @remarks none
@*
@*******************************************************************************
-@*/
+@*
@void ih264_pad_top(UWORD8 *pu1_src,
@ WORD32 src_strd,
@ WORD32 wd,
@@ -110,7 +110,7 @@ loop_neon_pad_top:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -147,7 +147,7 @@ loop_neon_pad_top:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_LEFT_LUMA == C
@void ih264_pad_left_luma(UWORD8 *pu1_src,
@ WORD32 src_strd,
@@ -160,6 +160,7 @@ loop_neon_pad_top:
@ r3 => pad_size
+
.global ih264_pad_left_luma_a9q
ih264_pad_left_luma_a9q:
@@ -245,7 +246,7 @@ end_func:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -282,7 +283,7 @@ end_func:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_LEFT_CHROMA == C
@void ih264_pad_left_chroma(UWORD8 *pu1_src,
@ WORD32 src_strd,
@@ -373,7 +374,7 @@ end_func_l_c:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -410,7 +411,7 @@ end_func_l_c:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_RIGHT_LUMA == C
@void ih264_pad_right_luma(UWORD8 *pu1_src,
@ WORD32 src_strd,
@@ -519,7 +520,7 @@ end_func_r:
-@/**
+@**
@*******************************************************************************
@*
@* @brief
@@ -556,7 +557,7 @@ end_func_r:
@* None
@*
@*******************************************************************************
-@*/
+@*
@#if PAD_RIGHT_CHROMA == C
@void ih264_pad_right_chroma(UWORD8 *pu1_src,
@ WORD32 src_strd,
diff --git a/common/arm/ih264_platform_macros.h b/common/arm/ih264_platform_macros.h
index 1f67403..1f67403 100755..100644
--- a/common/arm/ih264_platform_macros.h
+++ b/common/arm/ih264_platform_macros.h
diff --git a/common/arm/ih264_resi_trans_a9.s b/common/arm/ih264_resi_trans_a9.s
deleted file mode 100755
index 08821f5..0000000
--- a/common/arm/ih264_resi_trans_a9.s
+++ /dev/null
@@ -1,604 +0,0 @@
-@/******************************************************************************
-@ *
-@ * Copyright (C) 2015 The Android Open Source Project
-@ *
-@ * Licensed under the Apache License, Version 2.0 (the "License");
-@ * you may not use this file except in compliance with the License.
-@ * You may obtain a copy of the License at:
-@ *
-@ * http://www.apache.org/licenses/LICENSE-2.0
-@ *
-@ * Unless required by applicable law or agreed to in writing, software
-@ * distributed under the License is distributed on an "AS IS" BASIS,
-@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ * See the License for the specific language governing permissions and
-@ * limitations under the License.
-@ *
-@ *****************************************************************************
-@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
-@*/
-@/**
-@*******************************************************************************
-@* @file
-@* ih264_resi_trans_a9.s
-@*
-@* @brief
-@* Contains function definitions for residual and forward trans
-@*
-@* @author
-@* Ittiam
-@*
-@* @par List of Functions:
-@* ih264_resi_trans_4x4_a9
-@* ih264_resi_trans_8x8_a9
-@* @remarks
-@* None
-@*
-@*******************************************************************************
-
-
-.text
-.p2align 2
-@*****************************************************************************
-@*
-@* Function Name : ih264_resi_trans_4x4_a9
-@* Description : This function does cf4 of H264 followed by and approximate scaling
-@*
-@* Arguments :
-@ R0 :pointer to src buffer
-@ R1 :pointer to pred buffer
-@ R2 :pointer to dst buffer
-@ R3 :src_stride
-@ STACk :pred_stride,dst_stride
-
-@* Values Returned : NONE
-@*
-@* Register Usage :
-@* Stack Usage :
-@* Cycles : Around
-@* Interruptiaility : Interruptable
-@*
-@* Known Limitations
-@* \Assumptions :
-@*
-@* Revision History :
-@* DD MM YYYY Author(s) Changes
-@* 30 12 2009 100633 First version
-@*
-@*****************************************************************************
-
-
- .global ih264_resi_trans_4x4_a9
- .extern g_scal_coff_h264_4x4
-g_scal_coff_h264_4x4_addr:
- .long g_scal_coff_h264_4x4 - 4x4lbl - 8
-
-ih264_resi_trans_4x4_a9:
-
- @R0 :pointer to src buffer
- @R1 :pointer to pred buffer
- @R2 :pointer to dst buffer
- @R3 :src_stride
- @STACk :pred_stride,dst_stride
-
- push {r4-r12, lr} @push all the variables first
-
- mov r6, sp
- add r6, r6, #40 @decrement stack pointer,to accomodate two variables
- ldmfd r6, {r4-r5} @load the strides into registers
- @R4 pred_stride
- @R5 dst_stride
-
-
- @we have to give the stride as post inrement in VLDR1
- @but since thr stride is from end of row 1 to start of row 2,
- @we need to add the size of the curent row to strides ie we need to add 4 to it (4 bytes)
- @ADD R3,#4
- @ADD R4,#4
- @ADD R5,#4
- @in case of dst the stride represnts 16 bit ie 2*8bits
- @hence we need to add #4 to it and thenm multiply by 2
- @--------------------function loading done------------------------
-
- @lets find residual
- @data is like 1a -> d0[1:31] d0[32:64]
- @ a b c d # # # #
- vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer
- vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer
- @ data is like 1a -> q4[1:63] q4[64:148]
- @ d8[1:63] d9[1:63]
- @ a b c d # # # #
-
- vld1.u8 d28, [r0], r3 @load row 2 of src to d28[0]
- vld1.u8 d29, [r1], r4 @load row2 of pred to d29[0]
-
- vld1.u8 d26, [r0], r3 @load row 3 of src to d26[0]
- vsubl.u8 q0, d30, d31 @curr - pred for row one
-
- vld1.u8 d27, [r1], r4 @load row 3of pred t0 d27[0]
- vsubl.u8 q1, d28, d29 @find row 2 of src -pred to d0
-
- vld1.u8 d24, [r0], r3 @load row 4 of src to d24[0]
-
- vld1.u8 d25, [r1], r4 @load row 4 of src tp d25[0]
- vsubl.u8 q2, d26, d27 @load src-pred row 3 to d[2]
-
- lsl r5, r5, #2 @ multiply dst stride by since we are storing 32 bit values
- ldr r6, g_scal_coff_h264_4x4_addr
-4x4lbl:
- add r6, r6, pc @ load the address of global array
-
- vsubl.u8 q3, d24, d25 @load row 4 of src - pred to q6
-
- @after this
- @D0 -> 1a
- @D2 -> 2a
- @D4 -> 3a
- @D6 -> 4a
-
- @transpose the matrix so that we can do the horizontal transform first
- @#1 #2 #3 #4
- @a b c d ---- D0
- @e f g h -----D2
- @i j k l -----D4
- @m n o p -----D6
- @transpose the inner 2x2 blocks
- vtrn.16 d0, d2
- vld1.s16 {q10}, [r6]! @ load the scaling values 0-7;
- vtrn.16 d4, d6
- @a e c g
- @b f d h
- @i m k o
- @j n l p
- vtrn.32 d0, d4
- vtrn.32 d2, d6
- @a e i m #1 -- D0 --- x4
- @b f j n #2 -- D2 --- x5
- @c g k o #3 -- D4 ----x6
- @d h l p #4 -- D6 ----x7
-
- @we have loaded the residuals into the registers , now we need to add and subtract them
- @let us do the horiz transform first
-
- vsub.s16 d5, d2, d4 @x2 = x5-x6
- vsub.s16 d7, d0, d6 @x3 = x4-x7;
-
- vadd.s16 d3, d2, d4 @x1 = x5+x6
- vadd.s16 d1, d0, d6 @x0 = x4+x7
-
-
- vshl.s16 d31, d7, #1 @
- vshl.s16 d30, d5, #1 @
-
- vadd.s16 d0, d1, d3 @x0 + x1;
- vsub.s16 d4, d1, d3 @x0 - x1;
-
- vadd.s16 d2, d31, d5 @U_SHIFT(x3,1,shft) + x2;
- vsub.s16 d6, d7, d30 @x3 - U_SHIFT(x2,1,shft);
-
- @taking transform again so as to make do vert transform
- vtrn.16 d0, d2
- vtrn.16 d4, d6
-
- vtrn.32 d0, d4
- vtrn.32 d2, d6
-
- @let us do vertical transform
- @same code as horiz
-
- vadd.s16 d1, d0, d6 @x0 = x4+x7
- vadd.s16 d3, d2, d4 @x1 = x5+x6
- vsub.s16 d7, d0, d6 @x3 = x4-x7;
- vsub.s16 d5, d2, d4 @x2 = x5-x6
-
-
-@Since we are going to do scal / quant or whatever, we are going to divide by
-@a 32 bit number. So we have to expand the values
-
- @VADDL.S16 Q12,D1,D3;x0 + x1
- @VSUBL.S16 Q14,D1,D3;x0 - x1
-
- @VSHL.S16 D8,D5,#1;
- @VSHL.S16 D9,D7,#1;
-
- @VADDL.S16 Q13,D9,D5 ; + x2
- @VSUBL.S16 Q15,D7,D8 ;x3 - U_SHIFT(x2,1,shft)
-
-@scaling follows
-
-@now we need to do the scaling,so load the scaling matrix
-@mutliplying by the scaling coeffient; store the results from q5-q8 ;
-
- vadd.s16 d24, d3, d1 @x4 = x0 + x1
- vsub.s16 d28, d1, d3 @x6 = x0 - x1
-
- vshl.s16 d0, d7, #1 @ U_SHIFT(x3,1,shft)
- vmull.s16 q4, d24, d20 @x4*s0
-
- vshl.s16 d2, d5, #1 @ U_SHIFT(x2,1,shft)
-
- vadd.s16 d26, d0, d5 @x5 = U_SHIFT(x3,1,shft) + x2
- vmull.s16 q5, d26, d21 @x5*s1
-
- vst1.s32 {q4}, [r2], r5 @save 4 pixels of row1 current buffer and increment pointer by stride
-
- vld1.s16 {q10}, [r6] @load 8-16 scaling coeffcients
-
- vsub.s16 d30, d7, d2 @x7 = x3 - U_SHIFT(x2,1,shft)
-
- vmull.s16 q6, d28, d20 @x6*s2
- vst1.s32 {q5}, [r2], r5
-
- vmull.s16 q7, d30, d21 @x7*s3
-
-
- vst1.s32 {q6}, [r2], r5
- vst1.s32 {q7}, [r2]
-
- pop {r4-r12, pc} @pop back all variables
-
-
-
-
-@*****************************************************************************
-@* Function Name : ih264_resi_trans_8x8_a9
-@* Description : This function does cf8 followd by an approximate normalization of H264
-@*
-@* Arguments :
-@* R0 :pointer to src buffer
-@ R1 :pointer to pred buffer
-@ R2 :pointer to dst buffer
-@ R3 :src_stride
-@ STACk :pred_stride,dst_st
-@*
-@*
-@* Values Returned : NONE
-@*
-@* Register Usage :
-@* Stack Usage :
-@* Cycles : Around
-@* Interruptiaility : Interruptable
-@*
-@* Known Limitations
-@* \Assumptions :
-@*
-@* Revision History :
-@* DD MM YYYY Author(s) Changes
-@* 30 12 2009 100633 First version
-@*
-@*****************************************************************************
-
-
- .global ih264_resi_trans_8x8_a9
- .extern g_scal_coff_h264_8x8
-g_scal_coff_h264_8x8_addr:
- .long g_scal_coff_h264_8x8 - 8x8lbl - 8
-
-
-ih264_resi_trans_8x8_a9:
-
- @R0 :pointer to src buffer
- @R1 :pointer to pred buffer
- @R2 :pointer to dst buffer
- @R3 :src_stride
- @STACk :pred_stride,dst_stride
-
- push {r4-r12, lr} @push all the variables first
-
- mov r6, sp
- add r6, r6, #40 @decrement stack pointer,to accomodate two variables
- ldmfd r6, {r4-r5} @load the strides into registers
- @R4 pred_stride
- @R5 dst_stride
-
- @we have to give the stride as post inrement in vst1
- @in case of dst the stride represnts 16 bit ie 2*8bits
- @hence we need to add #4 to it and thenm multiply by 2
- @--------------------function loading done------------------------
-
- @lets find residual
- @data is like 1a -> d0[1:31] d0[32:64]
- @ a b c d # # # #
- vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer
- vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer
-
- vld1.u8 d28, [r0], r3 @src rw2
- vld1.u8 d29, [r1], r4 @pred rw2
- vsubl.u8 q0, d30, d31 @src-pred rw1
-
- vld1.u8 d26, [r0], r3
- vld1.u8 d27, [r1], r4
- vsubl.u8 q1, d28, d29
-
- vld1.u8 d24, [r0], r3
- vld1.u8 d25, [r1], r4
- vsubl.u8 q2, d26, d27
-
- vld1.u8 d22, [r0], r3
- vld1.u8 d23, [r1], r4
- vsubl.u8 q3, d24, d25
-
- vld1.u8 d20, [r0], r3
- vld1.u8 d21, [r1], r4
- vsubl.u8 q4, d22, d23
-
- vld1.u8 d18, [r0], r3
- vld1.u8 d19, [r1], r4
- vsubl.u8 q5, d20, d21
-
- vld1.u8 d16, [r0], r3
- vld1.u8 d17, [r1], r4
- vsubl.u8 q6, d18, d19
-
- lsl r5, r5, #2
-
-
- vsubl.u8 q7, d16, d17
-
- @after this
- @Q0 -> 1a
- @Q1 -> 2a
- @Q2 -> 3a
- @Q3 -> 4a
- @Q4 -> 5a
- @Q5 -> 6a
- @Q6 -> 7a
- @Q7 -> 8a
-
- @transpose the matrix so that we can do the horizontal transform first
-
- @transpose the inner 2x2 blocks
- vtrn.16 q0, q1
- vtrn.16 q2, q3
- vtrn.16 q4, q5
- vtrn.16 q6, q7
-
- @transpose the inner 4x4 blocks
- vtrn.32 q0, q2
- vtrn.32 q1, q3
-
- vtrn.32 q4, q6
- vtrn.32 q5, q7
-
- @transpose the outer 8x8 blocks
- vswp d1, d8
- vswp d7, d14
- vswp d3, d10
- vswp d5, d12
- @transpose done
-
-@@this point we will have data in Q0-Q7
-@Q7 will be populated within 2 clock cycle
-@all others are availabe @ this clock cycle
-
- @we have loaded the residuals into the registers , now we need to add and subtract them
- @let us do the horiz transform first
-
- vadd.s16 q8, q0, q7 @ a0 = r0 + r7;
- vadd.s16 q9, q1, q6 @ a1 = r1 + r6;
- vadd.s16 q10, q2, q5 @ a2 = r2 + r5;
- vadd.s16 q11, q3, q4 @ a3 = r3 + r4;
-
- vsub.s16 q12, q0, q7 @ b0 = r0 - r7;
- vsub.s16 q13, q1, q6 @ b1 = r1 - r6;
- vsub.s16 q15, q3, q4 @ b3 = r3 - r4;
- vsub.s16 q14, q2, q5 @ b2 = r2 - r5;
-
- vadd.s16 q1, q8, q11 @ a4 = a0 + a3;
- vadd.s16 q3, q9, q10 @ a5 = a1 + a2;
- vsub.s16 q7, q9, q10 @ a7 = a1 - a2;
- vsub.s16 q5, q8, q11 @ a6 = a0 - a3;
-
- ldr r6, g_scal_coff_h264_8x8_addr
-8x8lbl:
- add r6, r6, pc @ load the address of global array
-
- vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5;
- vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
-
- vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5;
-
- vadd.s16 q2, q5, q8 @
-
-
- vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
- vsub.s16 q6, q9, q7 @
-
-@do not change Q0,Q2.Q4,Q6 they contain results
-@Q1,Q3,Q5,Q7 TO STORE RESULTS
-@Q8 Q9 Q10 Q11 USE @WILL
-
- vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft)
- vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft)
- vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft)
- vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft)
-
- vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0);
- vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0);
- vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2);
- vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
- vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft)
- vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft);
- vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft);
- vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft);
-
-
- vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
- vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
- vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
- vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
-
- @------------horiz transform done-------------------------
- @results are in Q0-Q7
- @all other neon registes can be used at will
-
-@doing vertical transform
-@code exact copy of horiz transform above
-
- @transpose the inner 2x2 blocks
- vtrn.16 q0, q1
- vtrn.16 q2, q3
- vtrn.16 q4, q5
- vtrn.16 q6, q7
-
- @transpose the inner 4x4 blocks
- vtrn.32 q0, q2
- vtrn.32 q1, q3
-
- vtrn.32 q4, q6
- vtrn.32 q5, q7
-
- @transpose the outer 8x8 blocks
- vswp d1, d8
- vswp d3, d10
- vswp d5, d12
- vswp d7, d14
-
- @transpose done
-
- vadd.s16 q8, q0, q7 @ a0 = r0 + r7;
- vadd.s16 q9, q1, q6 @ a1 = r1 + r6;
- vadd.s16 q10, q2, q5 @ a2 = r2 + r5;
- vadd.s16 q11, q3, q4 @ a3 = r3 + r4;
-
- vsub.s16 q12, q0, q7 @ b0 = r0 - r7;
- vsub.s16 q13, q1, q6 @ b1 = r1 - r6;
- vsub.s16 q14, q2, q5 @ b2 = r2 - r5;
- vsub.s16 q15, q3, q4 @ b3 = r3 - r4;
-
- vadd.s16 q1, q8, q11 @ a4 = a0 + a3;
- vadd.s16 q3, q9, q10 @ a5 = a1 + a2;
- vsub.s16 q5, q8, q11 @ a6 = a0 - a3;
- vsub.s16 q7, q9, q10 @ a7 = a1 - a2;
-
-
- vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5;
-
- vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
- @DSHIFT_TO_0 Q8,Q7,#1,#0
- vadd.s16 q2, q5, q8 @
-
- vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5;
-
- vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
- vsub.s16 q6, q9, q7 @
-
-@do not change Q0,Q2.Q4,Q6 they contain results
-@Q1,Q3,Q5,Q7 TO STORE RESULTS
-@Q8 Q9 Q10 Q11 USE @WILL
-
- vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft)
- vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft)
- vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft)
- vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft)
-
-
- vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0);
- vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0);
- vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2);
- vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1);
- vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
- vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
- vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
- vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
-
- vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft)
- vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft);
- vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft);
- vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft);
-
-
-@since we are going to scal by small values, we need not expand the guys to 32 bit bit values
- vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
- vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
- vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
- vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
-
- @------------vert transform done-------------------------
- @results are in Q0-Q7
- @all other neon registes can be used at will
-
- @scaling
- @since the 8x8 scaling matrix repeats in 1x4,1x4 block ,
- @we need only load 4 values for each row and in total 4 rows
- vld1.s16 {q14-q15}, [r6] @
-
- @since we need to get a 32 bit o/p for two 16 bit multiplications
- @we need a VMULL instruction
-@-----------------------------first and second row
-
- vmull.s16 q8, d0, d28 @scale the first row first 4 elem
- vmull.s16 q9, d28, d1 @scale the second row last 4 elemts
-
- vmull.s16 q10, d2, d29 @ scale second row first 4 elem
- vmull.s16 q11, d29, d3 @scale the second row last 4 elem
- vmull.s16 q12, d4, d30 @scale third row first 4 elem
-
- vst1.s32 {q8, q9}, [r2], r5 @ write the first row complete
-
- vmull.s16 q13, d30, d5 @scale the third row last 4 elem
- vmull.s16 q8, d6, d31 @scale the fourth row first 4 elem
-
-
- vst1.s32 {q10, q11}, [r2], r5 @store the second row complete
-
-@------------------------------- 3rd and 4th row
-
- vmull.s16 q9, d31, d7 @scale the fourth row second column
-
- vst1.s32 {q12, q13}, [r2], r5 @store the third row complete
-
- vmull.s16 q10, d8, d28 @scale the 5th row fisrst 4 elms
- vmull.s16 q11, d28, d9 @scale the 5th row second 4 elems
-
- vmull.s16 q12, d10, d29 @scale the 6th row first4 elements
-
-
- vst1.s32 {q8, q9}, [r2], r5 @store fifth row
-
-@--------------------------------5th and 6th row
-
- vmull.s16 q13, d29, d11 @scale 6th row sendond 4 elems
-
- vmull.s16 q8, d12, d30 @scale 7th rw first 4 elms
-
- vst1.s32 {q10, q11}, [r2], r5 @store 6th row second 4 elements
-
- vmull.s16 q9, d30, d13 @scale 7th rw second 4 elms
- vmull.s16 q10, d14, d31 @scale 8th rw forst 4 elms
-
-
- vst1.s32 {q12, q13}, [r2], r5 @store 6th row
-
-@----------------------------------7th and 8th row
- vmull.s16 q11, d31, d15 @scale 8th row second 4 elms
-
- vst1.s32 {q8, q9}, [r2], r5 @store 7th row
- vst1.s32 {q10, q11}, [r2], r5 @store 8th row
-
-@----------------------------------done writing
-
- pop {r4-r12, pc} @pop back all variables
-
-
-
-
-
-
diff --git a/common/arm/ih264_resi_trans_quant_a9.s b/common/arm/ih264_resi_trans_quant_a9.s
index caf362e..bb836bd 100755..100644
--- a/common/arm/ih264_resi_trans_quant_a9.s
+++ b/common/arm/ih264_resi_trans_quant_a9.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@*******************************************************************************
@* @file
@* ih264_resi_trans_quant_a9.s
diff --git a/common/arm/ih264_weighted_bi_pred_a9q.s b/common/arm/ih264_weighted_bi_pred_a9q.s
index ccae779..33859e6 100755..100644
--- a/common/arm/ih264_weighted_bi_pred_a9q.s
+++ b/common/arm/ih264_weighted_bi_pred_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_weighted_bi_pred_a9q.s
@@ -37,7 +37,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@*******************************************************************************
@* @function
@* ih264_weighted_bi_pred_luma_a9q()
@@ -96,7 +96,7 @@
@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_bi_pred_luma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
@@ -411,7 +411,7 @@ end_loops:
@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_bi_pred_chroma_a9q(UWORD8 *pu1_src1,
@ UWORD8 *pu1_src2,
@ UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_weighted_pred_a9q.s b/common/arm/ih264_weighted_pred_a9q.s
index 1ce94d0..81d26d4 100755..100644
--- a/common/arm/ih264_weighted_pred_a9q.s
+++ b/common/arm/ih264_weighted_pred_a9q.s
@@ -17,7 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
@******************************************************************************
@* @file
@* ih264_weighted_pred_a9q.s
@@ -37,7 +37,7 @@
@* None
@*
@*******************************************************************************
-@*/
+@*
@*******************************************************************************
@* @function
@* ih264_weighted_pred_luma_a9q()
@@ -84,7 +84,7 @@
@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_pred_luma_a9q(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
@@ -314,7 +314,7 @@ end_loops:
@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
@*
@*******************************************************************************
-@*/
+@*
@void ih264_weighted_pred_chroma_a9q(UWORD8 *pu1_src,
@ UWORD8 *pu1_dst,
@ WORD32 src_strd,
diff --git a/common/armv8/ih264_deblk_chroma_av8.s b/common/armv8/ih264_deblk_chroma_av8.s
index 3021556..3021556 100755..100644
--- a/common/armv8/ih264_deblk_chroma_av8.s
+++ b/common/armv8/ih264_deblk_chroma_av8.s
diff --git a/common/armv8/ih264_deblk_luma_av8.s b/common/armv8/ih264_deblk_luma_av8.s
index bcdb03f..bcdb03f 100755..100644
--- a/common/armv8/ih264_deblk_luma_av8.s
+++ b/common/armv8/ih264_deblk_luma_av8.s
diff --git a/common/armv8/ih264_default_weighted_pred_av8.s b/common/armv8/ih264_default_weighted_pred_av8.s
index aefb902..6823015 100755..100644
--- a/common/armv8/ih264_default_weighted_pred_av8.s
+++ b/common/armv8/ih264_default_weighted_pred_av8.s
@@ -24,7 +24,6 @@
//*
//* @brief
//* Contains function definitions for default weighted prediction.
-//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
//*
//* @author
//* Kaushik Senthoor R
diff --git a/common/armv8/ih264_ihadamard_scaling_av8.s b/common/armv8/ih264_ihadamard_scaling_av8.s
index 712c9ae..712c9ae 100755..100644
--- a/common/armv8/ih264_ihadamard_scaling_av8.s
+++ b/common/armv8/ih264_ihadamard_scaling_av8.s
diff --git a/common/armv8/ih264_inter_pred_chroma_av8.s b/common/armv8/ih264_inter_pred_chroma_av8.s
index 714e271..714e271 100755..100644
--- a/common/armv8/ih264_inter_pred_chroma_av8.s
+++ b/common/armv8/ih264_inter_pred_chroma_av8.s
diff --git a/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
index 6ad463a..6ad463a 100755..100644
--- a/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
+++ b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
diff --git a/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
index 38934c9..9564f99 100755..100644
--- a/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
+++ b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
@@ -247,8 +247,8 @@ loop_16: //when wd=16
st1 {v30.2s, v31.2s}, [x1], x3 // store row 6
sqrshrun v30.8b, v28.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
- swp v0.8b v4.8b
- swp v1.8b v5.8b
+ swp v0.8b, v4.8b
+ swp v1.8b, v5.8b
@@ -257,8 +257,8 @@ loop_16: //when wd=16
mov v7.8b, v11.8b
subs x12, x14, #1 // if height==16 - looping
- swp v4.8b v8.8b
- swp v5.8b v9.8b
+ swp v4.8b, v8.8b
+ swp v5.8b, v9.8b
sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
diff --git a/common/armv8/ih264_inter_pred_luma_copy_av8.s b/common/armv8/ih264_inter_pred_luma_copy_av8.s
index 1a76c1c..1a76c1c 100755..100644
--- a/common/armv8/ih264_inter_pred_luma_copy_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_copy_av8.s
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
index ea7645e..202c516 100755..100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
@@ -68,7 +68,7 @@
ih264_inter_pred_luma_horz_hpel_vert_hpel_av8:
- //store register values to stack
+ //store register values to stack
push_v_regs
stp x19, x20, [sp, #-16]!
@@ -811,7 +811,7 @@ loop_4:
bgt loop_4
end_func:
- //Restoring registers from stack
+ //Restoring registers from stack
ldp x19, x20, [sp], #16
pop_v_regs
ret
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
index 3737e3f..38f971b 100755..100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
@@ -1111,7 +1111,7 @@ loop_4:
bgt loop_4
end_func:
- //Restoring registers from stack
+ //Restoring registers from stack
ldp x19, x20, [sp], #16
pop_v_regs
ret
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
index 39e3253..39e3253 100755..100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
index b1e4866..b1e4866 100755..100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
index ab663d0..ab663d0 100755..100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
diff --git a/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
index 9d19a2d..9d19a2d 100755..100644
--- a/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
diff --git a/common/armv8/ih264_intra_pred_chroma_av8.s b/common/armv8/ih264_intra_pred_chroma_av8.s
index 62edfdc..2c5efb3 100755..100644
--- a/common/armv8/ih264_intra_pred_chroma_av8.s
+++ b/common/armv8/ih264_intra_pred_chroma_av8.s
@@ -262,7 +262,7 @@ ih264_intra_pred_chroma_8x8_mode_horz_av8:
- push_v_regs
+ push_v_regs
ld1 {v0.8h}, [x0]
dup v10.8h, v0.h[7]
diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
index a9eb165..a9eb165 100755..100644
--- a/common/armv8/ih264_intra_pred_luma_16x16_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
diff --git a/common/armv8/ih264_intra_pred_luma_4x4_av8.s b/common/armv8/ih264_intra_pred_luma_4x4_av8.s
index 62e8cee..62e8cee 100755..100644
--- a/common/armv8/ih264_intra_pred_luma_4x4_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_4x4_av8.s
diff --git a/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
index 2b972ca..2b972ca 100755..100644
--- a/common/armv8/ih264_intra_pred_luma_8x8_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
diff --git a/common/armv8/ih264_iquant_itrans_recon_av8.s b/common/armv8/ih264_iquant_itrans_recon_av8.s
index 4c83036..4c83036 100755..100644
--- a/common/armv8/ih264_iquant_itrans_recon_av8.s
+++ b/common/armv8/ih264_iquant_itrans_recon_av8.s
diff --git a/common/armv8/ih264_iquant_itrans_recon_dc_av8.s b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
index 8bb9c32..8bb9c32 100755..100644
--- a/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
+++ b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
diff --git a/common/armv8/ih264_mem_fns_neon_av8.s b/common/armv8/ih264_mem_fns_neon_av8.s
index f5c2e29..f5c2e29 100755..100644
--- a/common/armv8/ih264_mem_fns_neon_av8.s
+++ b/common/armv8/ih264_mem_fns_neon_av8.s
diff --git a/common/armv8/ih264_neon_macros.s b/common/armv8/ih264_neon_macros.s
index 6ff5b91..6ff5b91 100755..100644
--- a/common/armv8/ih264_neon_macros.s
+++ b/common/armv8/ih264_neon_macros.s
diff --git a/common/armv8/ih264_padding_neon_av8.s b/common/armv8/ih264_padding_neon_av8.s
index 35d9c8a..35d9c8a 100755..100644
--- a/common/armv8/ih264_padding_neon_av8.s
+++ b/common/armv8/ih264_padding_neon_av8.s
diff --git a/common/armv8/ih264_platform_macros.h b/common/armv8/ih264_platform_macros.h
index 1f67403..1f67403 100755..100644
--- a/common/armv8/ih264_platform_macros.h
+++ b/common/armv8/ih264_platform_macros.h
diff --git a/common/armv8/ih264_resi_trans_quant_av8.s b/common/armv8/ih264_resi_trans_quant_av8.s
index dc1c680..dc1c680 100755..100644
--- a/common/armv8/ih264_resi_trans_quant_av8.s
+++ b/common/armv8/ih264_resi_trans_quant_av8.s
diff --git a/common/armv8/ih264_weighted_bi_pred_av8.s b/common/armv8/ih264_weighted_bi_pred_av8.s
index f7d0846..96ef50a 100755..100644
--- a/common/armv8/ih264_weighted_bi_pred_av8.s
+++ b/common/armv8/ih264_weighted_bi_pred_av8.s
@@ -24,7 +24,6 @@
//*
//* @brief
//* Contains function definitions for weighted biprediction.
-//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
//*
//* @author
//* Kaushik Senthoor R
diff --git a/common/armv8/ih264_weighted_pred_av8.s b/common/armv8/ih264_weighted_pred_av8.s
index 6a03875..ec5bb7a 100755..100644
--- a/common/armv8/ih264_weighted_pred_av8.s
+++ b/common/armv8/ih264_weighted_pred_av8.s
@@ -24,7 +24,6 @@
//*
//* @brief
//* Contains function definitions for weighted prediction.
-//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
//*
//* @author
//* Kaushik Senthoor R
diff --git a/common/ih264_buf_mgr.c b/common/ih264_buf_mgr.c
index ea4333e..ea4333e 100755..100644
--- a/common/ih264_buf_mgr.c
+++ b/common/ih264_buf_mgr.c
diff --git a/common/ih264_buf_mgr.h b/common/ih264_buf_mgr.h
index 52efa70..52efa70 100755..100644
--- a/common/ih264_buf_mgr.h
+++ b/common/ih264_buf_mgr.h
diff --git a/common/ih264_cabac_tables.c b/common/ih264_cabac_tables.c
index 118ca12..118ca12 100755..100644
--- a/common/ih264_cabac_tables.c
+++ b/common/ih264_cabac_tables.c
diff --git a/common/ih264_cabac_tables.h b/common/ih264_cabac_tables.h
index 0cef51e..0cef51e 100755..100644
--- a/common/ih264_cabac_tables.h
+++ b/common/ih264_cabac_tables.h
diff --git a/common/ih264_cavlc_tables.c b/common/ih264_cavlc_tables.c
index f122ab9..f122ab9 100755..100644
--- a/common/ih264_cavlc_tables.c
+++ b/common/ih264_cavlc_tables.c
diff --git a/common/ih264_cavlc_tables.h b/common/ih264_cavlc_tables.h
index 78057b5..78057b5 100755..100644
--- a/common/ih264_cavlc_tables.h
+++ b/common/ih264_cavlc_tables.h
diff --git a/common/ih264_chroma_intra_pred_filters.c b/common/ih264_chroma_intra_pred_filters.c
index ee145e5..ee145e5 100755..100644
--- a/common/ih264_chroma_intra_pred_filters.c
+++ b/common/ih264_chroma_intra_pred_filters.c
diff --git a/common/ih264_common_tables.c b/common/ih264_common_tables.c
index c53c276..c53c276 100755..100644
--- a/common/ih264_common_tables.c
+++ b/common/ih264_common_tables.c
diff --git a/common/ih264_common_tables.h b/common/ih264_common_tables.h
index 3127a2c..3127a2c 100755..100644
--- a/common/ih264_common_tables.h
+++ b/common/ih264_common_tables.h
diff --git a/common/ih264_deblk_edge_filters.c b/common/ih264_deblk_edge_filters.c
index d2ffefd..d2ffefd 100755..100644
--- a/common/ih264_deblk_edge_filters.c
+++ b/common/ih264_deblk_edge_filters.c
diff --git a/common/ih264_deblk_edge_filters.h b/common/ih264_deblk_edge_filters.h
index 4079dd2..4079dd2 100755..100644
--- a/common/ih264_deblk_edge_filters.h
+++ b/common/ih264_deblk_edge_filters.h
diff --git a/common/ih264_deblk_tables.c b/common/ih264_deblk_tables.c
index 91e28e0..91e28e0 100755..100644
--- a/common/ih264_deblk_tables.c
+++ b/common/ih264_deblk_tables.c
diff --git a/common/ih264_deblk_tables.h b/common/ih264_deblk_tables.h
index 3935dcb..3935dcb 100755..100644
--- a/common/ih264_deblk_tables.h
+++ b/common/ih264_deblk_tables.h
diff --git a/common/ih264_debug.h b/common/ih264_debug.h
index 96ff2a7..96ff2a7 100755..100644
--- a/common/ih264_debug.h
+++ b/common/ih264_debug.h
diff --git a/common/ih264_defs.h b/common/ih264_defs.h
index 8d7e387..8d7e387 100755..100644
--- a/common/ih264_defs.h
+++ b/common/ih264_defs.h
diff --git a/common/ih264_disp_mgr.c b/common/ih264_disp_mgr.c
index 2bdb524..2bdb524 100755..100644
--- a/common/ih264_disp_mgr.c
+++ b/common/ih264_disp_mgr.c
diff --git a/common/ih264_disp_mgr.h b/common/ih264_disp_mgr.h
index 6f56493..6f56493 100755..100644
--- a/common/ih264_disp_mgr.h
+++ b/common/ih264_disp_mgr.h
diff --git a/common/ih264_dpb_mgr.c b/common/ih264_dpb_mgr.c
index 8e087d3..9380b7e 100755..100644
--- a/common/ih264_dpb_mgr.c
+++ b/common/ih264_dpb_mgr.c
@@ -536,7 +536,7 @@ WORD32 ih264_dpb_mgr_alternate_ref_fields(dpb_mgr_t *ps_dpb_mgr,
BOTTOM_FIELD:TOP_FIELD;
}
- if((reference_type == SHORT_TERM_REF))
+ if(reference_type == SHORT_TERM_REF)
{
ps_dpb_mgr->ps_dpb_short_term_head = ps_dpb_head->ps_prev_dpb;
}
diff --git a/common/ih264_dpb_mgr.h b/common/ih264_dpb_mgr.h
index b0cf0fd..b0cf0fd 100755..100644
--- a/common/ih264_dpb_mgr.h
+++ b/common/ih264_dpb_mgr.h
diff --git a/common/ih264_error.h b/common/ih264_error.h
index ff1662d..ff1662d 100755..100644
--- a/common/ih264_error.h
+++ b/common/ih264_error.h
diff --git a/common/ih264_ihadamard_scaling.c b/common/ih264_ihadamard_scaling.c
index e4729c8..e4729c8 100755..100644
--- a/common/ih264_ihadamard_scaling.c
+++ b/common/ih264_ihadamard_scaling.c
diff --git a/common/ih264_inter_pred_filters.c b/common/ih264_inter_pred_filters.c
index 7d1e407..7d1e407 100755..100644
--- a/common/ih264_inter_pred_filters.c
+++ b/common/ih264_inter_pred_filters.c
diff --git a/common/ih264_inter_pred_filters.h b/common/ih264_inter_pred_filters.h
index c439ab8..c439ab8 100755..100644
--- a/common/ih264_inter_pred_filters.h
+++ b/common/ih264_inter_pred_filters.h
diff --git a/common/ih264_intra_pred_filters.h b/common/ih264_intra_pred_filters.h
index caf6b33..caf6b33 100755..100644
--- a/common/ih264_intra_pred_filters.h
+++ b/common/ih264_intra_pred_filters.h
diff --git a/common/ih264_iquant_itrans_recon.c b/common/ih264_iquant_itrans_recon.c
index 3c14046..3c14046 100755..100644
--- a/common/ih264_iquant_itrans_recon.c
+++ b/common/ih264_iquant_itrans_recon.c
diff --git a/common/ih264_itrans_recon.h b/common/ih264_itrans_recon.h
index fd1f239..fd1f239 100755..100644
--- a/common/ih264_itrans_recon.h
+++ b/common/ih264_itrans_recon.h
diff --git a/common/ih264_list.c b/common/ih264_list.c
index 736b41c..736b41c 100755..100644
--- a/common/ih264_list.c
+++ b/common/ih264_list.c
diff --git a/common/ih264_list.h b/common/ih264_list.h
index fc59d95..fc59d95 100755..100644
--- a/common/ih264_list.h
+++ b/common/ih264_list.h
diff --git a/common/ih264_luma_intra_pred_filters.c b/common/ih264_luma_intra_pred_filters.c
index 4a5b143..4a5b143 100755..100644
--- a/common/ih264_luma_intra_pred_filters.c
+++ b/common/ih264_luma_intra_pred_filters.c
diff --git a/common/ih264_macros.h b/common/ih264_macros.h
index 969012f..969012f 100755..100644
--- a/common/ih264_macros.h
+++ b/common/ih264_macros.h
diff --git a/common/ih264_mem_fns.c b/common/ih264_mem_fns.c
index 1c1f328..1c1f328 100755..100644
--- a/common/ih264_mem_fns.c
+++ b/common/ih264_mem_fns.c
diff --git a/common/ih264_mem_fns.h b/common/ih264_mem_fns.h
index e0167f4..e0167f4 100755..100644
--- a/common/ih264_mem_fns.h
+++ b/common/ih264_mem_fns.h
diff --git a/common/ih264_padding.c b/common/ih264_padding.c
index 8e8f3e2..8e8f3e2 100755..100644
--- a/common/ih264_padding.c
+++ b/common/ih264_padding.c
diff --git a/common/ih264_padding.h b/common/ih264_padding.h
index e4e18fb..e4e18fb 100755..100644
--- a/common/ih264_padding.h
+++ b/common/ih264_padding.h
diff --git a/common/ih264_resi_trans.h b/common/ih264_resi_trans.h
index ee0add3..ee0add3 100755..100644
--- a/common/ih264_resi_trans.h
+++ b/common/ih264_resi_trans.h
diff --git a/common/ih264_resi_trans_quant.c b/common/ih264_resi_trans_quant.c
index cf1d43c..cf1d43c 100755..100644
--- a/common/ih264_resi_trans_quant.c
+++ b/common/ih264_resi_trans_quant.c
diff --git a/common/ih264_size_defs.h b/common/ih264_size_defs.h
index e2a8b76..e2a8b76 100755..100644
--- a/common/ih264_size_defs.h
+++ b/common/ih264_size_defs.h
diff --git a/common/ih264_structs.h b/common/ih264_structs.h
index fa4e142..fa4e142 100755..100644
--- a/common/ih264_structs.h
+++ b/common/ih264_structs.h
diff --git a/common/ih264_trans_data.c b/common/ih264_trans_data.c
index a1231e6..a1231e6 100755..100644
--- a/common/ih264_trans_data.c
+++ b/common/ih264_trans_data.c
diff --git a/common/ih264_trans_data.h b/common/ih264_trans_data.h
index dc77ae7..dc77ae7 100755..100644
--- a/common/ih264_trans_data.h
+++ b/common/ih264_trans_data.h
diff --git a/common/ih264_trans_macros.h b/common/ih264_trans_macros.h
index f114d0e..f114d0e 100755..100644
--- a/common/ih264_trans_macros.h
+++ b/common/ih264_trans_macros.h
diff --git a/common/ih264_trans_quant_itrans_iquant.h b/common/ih264_trans_quant_itrans_iquant.h
index 83551aa..83551aa 100755..100644
--- a/common/ih264_trans_quant_itrans_iquant.h
+++ b/common/ih264_trans_quant_itrans_iquant.h
diff --git a/common/ih264_typedefs.h b/common/ih264_typedefs.h
index 8e4685a..8e4685a 100755..100644
--- a/common/ih264_typedefs.h
+++ b/common/ih264_typedefs.h
diff --git a/common/ih264_weighted_pred.c b/common/ih264_weighted_pred.c
index d5d73f2..d5d73f2 100755..100644
--- a/common/ih264_weighted_pred.c
+++ b/common/ih264_weighted_pred.c
diff --git a/common/ih264_weighted_pred.h b/common/ih264_weighted_pred.h
index f9b93b0..f9b93b0 100755..100644
--- a/common/ih264_weighted_pred.h
+++ b/common/ih264_weighted_pred.h
diff --git a/common/ithread.c b/common/ithread.c
index 4ffb98a..25a8cd0 100755..100644
--- a/common/ithread.c
+++ b/common/ithread.c
@@ -327,6 +327,11 @@ WORD32 ithread_set_affinity(WORD32 core_id)
return 1;
}
+void ithread_set_name(CHAR *pc_thread_name)
+{
+ return;
+}
+
#else
UWORD32 ithread_get_handle_size(void)
diff --git a/common/ithread.h b/common/ithread.h
index f926f83..f926f83 100755..100644
--- a/common/ithread.h
+++ b/common/ithread.h
diff --git a/common/mips/ih264_platform_macros.h b/common/mips/ih264_platform_macros.h
index d098372..d098372 100755..100644
--- a/common/mips/ih264_platform_macros.h
+++ b/common/mips/ih264_platform_macros.h
diff --git a/common/x86/ih264_chroma_intra_pred_filters_ssse3.c b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
index 45101a4..45101a4 100755..100644
--- a/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
+++ b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
diff --git a/common/x86/ih264_deblk_chroma_ssse3.c b/common/x86/ih264_deblk_chroma_ssse3.c
index a36447a..a36447a 100755..100644
--- a/common/x86/ih264_deblk_chroma_ssse3.c
+++ b/common/x86/ih264_deblk_chroma_ssse3.c
diff --git a/common/x86/ih264_deblk_luma_ssse3.c b/common/x86/ih264_deblk_luma_ssse3.c
index 440d5f0..e29bebb 100755..100644
--- a/common/x86/ih264_deblk_luma_ssse3.c
+++ b/common/x86/ih264_deblk_luma_ssse3.c
@@ -856,7 +856,7 @@ void ih264_deblk_luma_vert_bslt4_ssse3(UWORD8 *pu1_src,
{
UWORD8 u1_Bs, u1_Bs1;
- UWORD32 j = 0;
+ WORD32 j = 0;
__m128i linea, lineb, linec, lined, linee, linef, lineg, lineh;
__m128i int1, int2, int3, int4, high1, high2;
diff --git a/common/x86/ih264_ihadamard_scaling_sse42.c b/common/x86/ih264_ihadamard_scaling_sse42.c
index 895291b..d68d105 100755..100644
--- a/common/x86/ih264_ihadamard_scaling_sse42.c
+++ b/common/x86/ih264_ihadamard_scaling_sse42.c
@@ -86,14 +86,19 @@
*
*******************************************************************************
*/
-void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, WORD16* pi2_out,
- const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
- UWORD32 u4_qp_div_6, WORD32* pi4_tmp) {
+void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src,
+ WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD32* pi4_tmp)
+{
__m128i src_r0_r1, src_r2_r3;
__m128i src_r0, src_r1, src_r2, src_r3;
__m128i temp0, temp1, temp2, temp3;
__m128i add_rshift = _mm_set1_epi32((1 << (5 - u4_qp_div_6)));
__m128i mult_val = _mm_set1_epi32(pu2_iscal_mat[0] * pu2_weigh_mat[0]);
+ UNUSED (pi4_tmp);
src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
@@ -171,12 +176,15 @@ void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, WORD16* pi2_out,
src_r3 = _mm_mullo_epi32(src_r3, mult_val);
//Scaling
- if (u4_qp_div_6 >= 6) {
+ if(u4_qp_div_6 >= 6)
+ {
src_r0 = _mm_slli_epi32(src_r0, u4_qp_div_6 - 6);
src_r1 = _mm_slli_epi32(src_r1, u4_qp_div_6 - 6);
src_r2 = _mm_slli_epi32(src_r2, u4_qp_div_6 - 6);
src_r3 = _mm_slli_epi32(src_r3, u4_qp_div_6 - 6);
- } else {
+ }
+ else
+ {
temp0 = _mm_add_epi32(src_r0, add_rshift);
temp1 = _mm_add_epi32(src_r1, add_rshift);
temp2 = _mm_add_epi32(src_r2, add_rshift);
@@ -194,16 +202,17 @@ void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, WORD16* pi2_out,
}
void ih264_ihadamard_scaling_2x2_uv_sse42(WORD16* pi2_src,
- WORD16* pi2_out,
- const UWORD16 *pu2_iscal_mat,
- const UWORD16 *pu2_weigh_mat,
- UWORD32 u4_qp_div_6,
- WORD32* pi4_tmp)
+ WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD32* pi4_tmp)
{
- UNUSED(pi4_tmp);
__m128i src, plane_0, plane_1, temp0, temp1, sign_reg;
__m128i zero_8x16b = _mm_setzero_si128();
__m128i scale_val = _mm_set1_epi32((WORD32)(pu2_iscal_mat[0] * pu2_weigh_mat[0]));
+ UNUSED(pi4_tmp);
+
src = _mm_loadu_si128((__m128i *) pi2_src); //a0 a1 a2 a3 b0 b1 b2 b3
sign_reg = _mm_cmpgt_epi16(zero_8x16b, src);
plane_0 = _mm_unpacklo_epi16(src, sign_reg); //a0 a1 a2 a3 -- 32 bits
diff --git a/common/x86/ih264_ihadamard_scaling_ssse3.c b/common/x86/ih264_ihadamard_scaling_ssse3.c
index 232d9fa..1b940ea 100755..100644
--- a/common/x86/ih264_ihadamard_scaling_ssse3.c
+++ b/common/x86/ih264_ihadamard_scaling_ssse3.c
@@ -85,9 +85,13 @@
*
*******************************************************************************
*/
-void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src, WORD16* pi2_out,
- const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
- UWORD32 u4_qp_div_6, WORD32* pi4_tmp) {
+void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src,
+ WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD32* pi4_tmp)
+{
int val = 0xFFFF;
__m128i src_r0_r1, src_r2_r3, sign_reg, zero_8x16b = _mm_setzero_si128();
__m128i src_r0, src_r1, src_r2, src_r3;
@@ -96,6 +100,8 @@ void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src, WORD16* pi2_out,
__m128i mult_val = _mm_set1_epi32(pu2_iscal_mat[0] * pu2_weigh_mat[0]);
__m128i mask = _mm_set1_epi32(val);
+ UNUSED (pi4_tmp);
+
mult_val = _mm_and_si128(mult_val, mask);
src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
@@ -177,12 +183,15 @@ void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src, WORD16* pi2_out,
src_r3 = _mm_madd_epi16(src_r3, mult_val);
//Scaling
- if (u4_qp_div_6 >= 6) {
+ if(u4_qp_div_6 >= 6)
+ {
src_r0 = _mm_slli_epi32(src_r0, u4_qp_div_6 - 6);
src_r1 = _mm_slli_epi32(src_r1, u4_qp_div_6 - 6);
src_r2 = _mm_slli_epi32(src_r2, u4_qp_div_6 - 6);
src_r3 = _mm_slli_epi32(src_r3, u4_qp_div_6 - 6);
- } else {
+ }
+ else
+ {
temp0 = _mm_add_epi32(src_r0, add_rshift);
temp1 = _mm_add_epi32(src_r1, add_rshift);
temp2 = _mm_add_epi32(src_r2, add_rshift);
diff --git a/common/x86/ih264_inter_pred_filters_ssse3.c b/common/x86/ih264_inter_pred_filters_ssse3.c
index 64e364e..6d318c9 100755..100644
--- a/common/x86/ih264_inter_pred_filters_ssse3.c
+++ b/common/x86/ih264_inter_pred_filters_ssse3.c
@@ -98,11 +98,10 @@ void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src,
{
__m128i y_0_16x8b, y_1_16x8b, y_2_16x8b, y_3_16x8b;
+ WORD32 src_strd2, src_strd3, src_strd4, dst_strd2, dst_strd3, dst_strd4;
UNUSED(pu1_tmp);
UNUSED(dydx);
- WORD32 src_strd2, src_strd3, src_strd4, dst_strd2, dst_strd3, dst_strd4;
-
src_strd2 = src_strd << 1;
dst_strd2 = dst_strd << 1;
src_strd4 = src_strd << 2;
@@ -1825,7 +1824,6 @@ void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src,
WORD32 y_offset;
UWORD8 *pu1_pred1;
- UNUSED(pu1_tmp);
__m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
__m128i src_r5_16x8b, src_r6_16x8b;
@@ -1835,6 +1833,7 @@ void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src,
__m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
__m128i const_val16_8x16b;
+ UNUSED(pu1_tmp);
y_offset = dydx & 0xf;
coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
diff --git a/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
index d43c8e2..565cc75 100755..100644
--- a/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
+++ b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
@@ -113,6 +113,8 @@ void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src,
UWORD32 *pu4_out = (UWORD32 *)pu1_out;
WORD32 q0 = pi2_src[0];
WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+ UNUSED (pi2_tmp);
+
INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
if (iq_start_idx != 0 )
@@ -233,6 +235,10 @@ void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src,
{
WORD32 q0 = pi2_src[0];
WORD16 i_macro, rnd_fact = (qp_div < 6) ? 1 << (5 - qp_div) : 0;
+ UNUSED (pi2_tmp);
+ UNUSED (iq_start_idx);
+ UNUSED (pi2_dc_ld_addr);
+
INV_QUANT(q0, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
i_macro = ((q0 + 32) >> 6);
@@ -392,6 +398,12 @@ void ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3(WORD16 *pi2_src,
__m128i chroma_mask = _mm_set1_epi16 (0xFF);
__m128i value_add = _mm_set1_epi16(i_macro);
+ UNUSED (pi2_src);
+ UNUSED (pu2_iscal_mat);
+ UNUSED (pu2_weigh_mat);
+ UNUSED (u4_qp_div_6);
+ UNUSED (pi2_tmp);
+
//Load pred buffer
pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
diff --git a/common/x86/ih264_iquant_itrans_recon_sse42.c b/common/x86/ih264_iquant_itrans_recon_sse42.c
index 2a4ea3f..6399b65 100755..100644
--- a/common/x86/ih264_iquant_itrans_recon_sse42.c
+++ b/common/x86/ih264_iquant_itrans_recon_sse42.c
@@ -120,6 +120,7 @@ void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src,
__m128i resq_r0, resq_r1, resq_r2, resq_r3;
__m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6)));
__m128i value_32 = _mm_set1_epi32(32);
+ UNUSED (pi2_tmp);
/*************************************************************/
/* Dequantization of coefficients. Will be replaced by SIMD */
@@ -369,6 +370,8 @@ void ih264_iquant_itrans_recon_chroma_4x4_sse42(WORD16 *pi2_src,
__m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6)));
__m128i value_32 = _mm_set1_epi32(32);
__m128i chroma_mask = _mm_set1_epi16 (0xFF);
+ UNUSED (pi2_tmp);
+
/*************************************************************/
/* Dequantization of coefficients. Will be replaced by SIMD */
/* operations on platform */
diff --git a/common/x86/ih264_iquant_itrans_recon_ssse3.c b/common/x86/ih264_iquant_itrans_recon_ssse3.c
index ca1397e..388cafe 100755..100644
--- a/common/x86/ih264_iquant_itrans_recon_ssse3.c
+++ b/common/x86/ih264_iquant_itrans_recon_ssse3.c
@@ -120,6 +120,8 @@ void ih264_iquant_itrans_recon_4x4_ssse3(WORD16 *pi2_src,
__m128i resq_r0, resq_r1, resq_r2, resq_r3;
__m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6)));
__m128i value_32 = _mm_set1_epi32(32);
+ UNUSED (pi2_tmp);
+ UNUSED (pi2_dc_ld_addr);
/*************************************************************/
/* Dequantization of coefficients. Will be replaced by SIMD */
@@ -397,6 +399,9 @@ void ih264_iquant_itrans_recon_8x8_ssse3(WORD16 *pi2_src,
__m128i resq_r0_1, resq_r0_2, resq_r1_1, resq_r1_2, resq_r2_1, resq_r2_2,
resq_r3_1, resq_r3_2, resq_r4_1, resq_r4_2, resq_r5_1, resq_r5_2,
resq_r6_1, resq_r6_2, resq_r7_1, resq_r7_2;
+ UNUSED (pi2_tmp);
+ UNUSED (iq_start_idx);
+ UNUSED (pi2_dc_ld_addr);
/*************************************************************/
/* Dequantization of coefficients. Will be replaced by SIMD */
diff --git a/common/x86/ih264_luma_intra_pred_filters_ssse3.c b/common/x86/ih264_luma_intra_pred_filters_ssse3.c
index 5a35372..5a35372 100755..100644
--- a/common/x86/ih264_luma_intra_pred_filters_ssse3.c
+++ b/common/x86/ih264_luma_intra_pred_filters_ssse3.c
diff --git a/common/x86/ih264_mem_fns_ssse3.c b/common/x86/ih264_mem_fns_ssse3.c
index 8ca1f3e..8ca1f3e 100755..100644
--- a/common/x86/ih264_mem_fns_ssse3.c
+++ b/common/x86/ih264_mem_fns_ssse3.c
diff --git a/common/x86/ih264_padding_ssse3.c b/common/x86/ih264_padding_ssse3.c
index 6dadd39..6dadd39 100755..100644
--- a/common/x86/ih264_padding_ssse3.c
+++ b/common/x86/ih264_padding_ssse3.c
diff --git a/common/x86/ih264_platform_macros.h b/common/x86/ih264_platform_macros.h
index e4b9821..e4b9821 100755..100644
--- a/common/x86/ih264_platform_macros.h
+++ b/common/x86/ih264_platform_macros.h
diff --git a/common/x86/ih264_resi_trans_quant_sse42.c b/common/x86/ih264_resi_trans_quant_sse42.c
index c267651..eca43ed 100755..100644
--- a/common/x86/ih264_resi_trans_quant_sse42.c
+++ b/common/x86/ih264_resi_trans_quant_sse42.c
@@ -121,6 +121,9 @@ void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred,
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
__m128i sign_reg0, sign_reg2;
__m128i scalemat_r0_r1, scalemat_r2_r3;
+
+ UNUSED (pu2_threshold_matrix);
+
scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); //a00 a01 a02 a03 0 0 0 0 0 0 0 0 -- all 8 bits
@@ -394,6 +397,8 @@ void ih264_resi_trans_quant_chroma_4x4_sse42(UWORD8 *pu1_src,UWORD8 *pu1_pred,WO
__m128i scalemat_r0_r1, scalemat_r2_r3;
__m128i chroma_mask = _mm_set1_epi16 (0xFF);
+ UNUSED (pu2_threshold_matrix);
+
scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); //a00 a01 a02 a03 0 0 0 0 0 0 0 0 -- all 8 bits
@@ -676,6 +681,8 @@ void ih264_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst,
__m128i rnd_fact = _mm_set1_epi32(u4_round_factor);
__m128i scale_val = _mm_set1_epi32(pu2_scale_matrix[0]);
+ UNUSED (pu2_threshold_matrix);
+
src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1);
@@ -902,6 +909,8 @@ void ih264_hadamard_quant_2x2_uv_sse42(WORD16 *pi2_src, WORD16 *pi2_dst,
__m128i temp_1 = _mm_set1_epi16(1);
__m128i rnd_fact = _mm_set1_epi32(u4_round_factor);
+ UNUSED (pu2_threshold_matrix);
+
src = _mm_loadu_si128((__m128i *)pi2_src); //a0 a1 a2 a3 b0 b1 b2 b3
sign_reg = _mm_cmpgt_epi16(zero_8x16b, src);
plane_0 = _mm_unpacklo_epi16(src, sign_reg); //a0 a1 a2 a3 -- 32 bits
diff --git a/common/x86/ih264_weighted_pred_sse42.c b/common/x86/ih264_weighted_pred_sse42.c
index b1684b7..b1684b7 100755..100644
--- a/common/x86/ih264_weighted_pred_sse42.c
+++ b/common/x86/ih264_weighted_pred_sse42.c
diff --git a/decoder.arm.mk b/decoder.arm.mk
index 556e838..556e838 100755..100644
--- a/decoder.arm.mk
+++ b/decoder.arm.mk
diff --git a/decoder.arm64.mk b/decoder.arm64.mk
index ddf0ad2..ddf0ad2 100755..100644
--- a/decoder.arm64.mk
+++ b/decoder.arm64.mk
diff --git a/decoder.mips.mk b/decoder.mips.mk
index 3d00395..3d00395 100755..100644
--- a/decoder.mips.mk
+++ b/decoder.mips.mk
diff --git a/decoder.mips64.mk b/decoder.mips64.mk
index ffcb882..ffcb882 100755..100644
--- a/decoder.mips64.mk
+++ b/decoder.mips64.mk
diff --git a/decoder.mk b/decoder.mk
index d4fa0be..d4fa0be 100755..100644
--- a/decoder.mk
+++ b/decoder.mk
diff --git a/decoder.x86.mk b/decoder.x86.mk
index 309bc23..309bc23 100755..100644
--- a/decoder.x86.mk
+++ b/decoder.x86.mk
diff --git a/decoder.x86_64.mk b/decoder.x86_64.mk
index 1b018f7..1b018f7 100755..100644
--- a/decoder.x86_64.mk
+++ b/decoder.x86_64.mk
diff --git a/decoder/arm/ih264d_function_selector.c b/decoder/arm/ih264d_function_selector.c
index 925043b..925043b 100755..100644
--- a/decoder/arm/ih264d_function_selector.c
+++ b/decoder/arm/ih264d_function_selector.c
diff --git a/decoder/arm/ih264d_function_selector_a9q.c b/decoder/arm/ih264d_function_selector_a9q.c
index 0cf8581..0cf8581 100755..100644
--- a/decoder/arm/ih264d_function_selector_a9q.c
+++ b/decoder/arm/ih264d_function_selector_a9q.c
diff --git a/decoder/arm/ih264d_function_selector_av8.c b/decoder/arm/ih264d_function_selector_av8.c
index 5715ee0..5715ee0 100755..100644
--- a/decoder/arm/ih264d_function_selector_av8.c
+++ b/decoder/arm/ih264d_function_selector_av8.c
diff --git a/decoder/ih264d.h b/decoder/ih264d.h
index f89e576..f89e576 100755..100644
--- a/decoder/ih264d.h
+++ b/decoder/ih264d.h
diff --git a/decoder/ih264d_api.c b/decoder/ih264d_api.c
index 67ef5bb..67ef5bb 100755..100644
--- a/decoder/ih264d_api.c
+++ b/decoder/ih264d_api.c
diff --git a/decoder/ih264d_bitstrm.c b/decoder/ih264d_bitstrm.c
index fd41bc6..fd41bc6 100755..100644
--- a/decoder/ih264d_bitstrm.c
+++ b/decoder/ih264d_bitstrm.c
diff --git a/decoder/ih264d_bitstrm.h b/decoder/ih264d_bitstrm.h
index 49cd5e7..49cd5e7 100755..100644
--- a/decoder/ih264d_bitstrm.h
+++ b/decoder/ih264d_bitstrm.h
diff --git a/decoder/ih264d_cabac.c b/decoder/ih264d_cabac.c
index 38028ae..38028ae 100755..100644
--- a/decoder/ih264d_cabac.c
+++ b/decoder/ih264d_cabac.c
diff --git a/decoder/ih264d_cabac.h b/decoder/ih264d_cabac.h
index 6ee3d52..6ee3d52 100755..100644
--- a/decoder/ih264d_cabac.h
+++ b/decoder/ih264d_cabac.h
diff --git a/decoder/ih264d_cabac_init_tables.c b/decoder/ih264d_cabac_init_tables.c
index 2c3a55e..2c3a55e 100755..100644
--- a/decoder/ih264d_cabac_init_tables.c
+++ b/decoder/ih264d_cabac_init_tables.c
diff --git a/decoder/ih264d_compute_bs.c b/decoder/ih264d_compute_bs.c
index 4a6750a..4a6750a 100755..100644
--- a/decoder/ih264d_compute_bs.c
+++ b/decoder/ih264d_compute_bs.c
diff --git a/decoder/ih264d_deblocking.c b/decoder/ih264d_deblocking.c
index ad4ce08..ad4ce08 100755..100644
--- a/decoder/ih264d_deblocking.c
+++ b/decoder/ih264d_deblocking.c
diff --git a/decoder/ih264d_deblocking.h b/decoder/ih264d_deblocking.h
index 21601aa..21601aa 100755..100644
--- a/decoder/ih264d_deblocking.h
+++ b/decoder/ih264d_deblocking.h
diff --git a/decoder/ih264d_debug.c b/decoder/ih264d_debug.c
index 5650e20..5650e20 100755..100644
--- a/decoder/ih264d_debug.c
+++ b/decoder/ih264d_debug.c
diff --git a/decoder/ih264d_debug.h b/decoder/ih264d_debug.h
index 787b697..787b697 100755..100644
--- a/decoder/ih264d_debug.h
+++ b/decoder/ih264d_debug.h
diff --git a/decoder/ih264d_defs.h b/decoder/ih264d_defs.h
index 3f8bc58..3f8bc58 100755..100644
--- a/decoder/ih264d_defs.h
+++ b/decoder/ih264d_defs.h
diff --git a/decoder/ih264d_dpb_manager.h b/decoder/ih264d_dpb_manager.h
index a9539c8..a9539c8 100755..100644
--- a/decoder/ih264d_dpb_manager.h
+++ b/decoder/ih264d_dpb_manager.h
diff --git a/decoder/ih264d_dpb_mgr.c b/decoder/ih264d_dpb_mgr.c
index 205bc9b..205bc9b 100755..100644
--- a/decoder/ih264d_dpb_mgr.c
+++ b/decoder/ih264d_dpb_mgr.c
diff --git a/decoder/ih264d_error_handler.h b/decoder/ih264d_error_handler.h
index 20c0f89..20c0f89 100755..100644
--- a/decoder/ih264d_error_handler.h
+++ b/decoder/ih264d_error_handler.h
diff --git a/decoder/ih264d_format_conv.c b/decoder/ih264d_format_conv.c
index 9a8494e..9a8494e 100755..100644
--- a/decoder/ih264d_format_conv.c
+++ b/decoder/ih264d_format_conv.c
diff --git a/decoder/ih264d_format_conv.h b/decoder/ih264d_format_conv.h
index 81a8a0f..81a8a0f 100755..100644
--- a/decoder/ih264d_format_conv.h
+++ b/decoder/ih264d_format_conv.h
diff --git a/decoder/ih264d_function_selector.h b/decoder/ih264d_function_selector.h
index 92ad959..92ad959 100755..100644
--- a/decoder/ih264d_function_selector.h
+++ b/decoder/ih264d_function_selector.h
diff --git a/decoder/ih264d_function_selector_generic.c b/decoder/ih264d_function_selector_generic.c
index 48956ef..48956ef 100755..100644
--- a/decoder/ih264d_function_selector_generic.c
+++ b/decoder/ih264d_function_selector_generic.c
diff --git a/decoder/ih264d_inter_pred.c b/decoder/ih264d_inter_pred.c
index fa818b5..fa818b5 100755..100644
--- a/decoder/ih264d_inter_pred.c
+++ b/decoder/ih264d_inter_pred.c
diff --git a/decoder/ih264d_inter_pred.h b/decoder/ih264d_inter_pred.h
index 52d648a..52d648a 100755..100644
--- a/decoder/ih264d_inter_pred.h
+++ b/decoder/ih264d_inter_pred.h
diff --git a/decoder/ih264d_mb_utils.c b/decoder/ih264d_mb_utils.c
index 4cbfca5..4cbfca5 100755..100644
--- a/decoder/ih264d_mb_utils.c
+++ b/decoder/ih264d_mb_utils.c
diff --git a/decoder/ih264d_mb_utils.h b/decoder/ih264d_mb_utils.h
index 6e359f5..6e359f5 100755..100644
--- a/decoder/ih264d_mb_utils.h
+++ b/decoder/ih264d_mb_utils.h
diff --git a/decoder/ih264d_mem_request.h b/decoder/ih264d_mem_request.h
index 3c60c72..3c60c72 100755..100644
--- a/decoder/ih264d_mem_request.h
+++ b/decoder/ih264d_mem_request.h
diff --git a/decoder/ih264d_mvpred.c b/decoder/ih264d_mvpred.c
index fb4932f..fb4932f 100755..100644
--- a/decoder/ih264d_mvpred.c
+++ b/decoder/ih264d_mvpred.c
diff --git a/decoder/ih264d_mvpred.h b/decoder/ih264d_mvpred.h
index 66366ca..66366ca 100755..100644
--- a/decoder/ih264d_mvpred.h
+++ b/decoder/ih264d_mvpred.h
diff --git a/decoder/ih264d_nal.c b/decoder/ih264d_nal.c
index 48450c8..48450c8 100755..100644
--- a/decoder/ih264d_nal.c
+++ b/decoder/ih264d_nal.c
diff --git a/decoder/ih264d_nal.h b/decoder/ih264d_nal.h
index 3778881..3778881 100755..100644
--- a/decoder/ih264d_nal.h
+++ b/decoder/ih264d_nal.h
diff --git a/decoder/ih264d_parse_bslice.c b/decoder/ih264d_parse_bslice.c
index 89cf5ed..89cf5ed 100755..100644
--- a/decoder/ih264d_parse_bslice.c
+++ b/decoder/ih264d_parse_bslice.c
diff --git a/decoder/ih264d_parse_cabac.c b/decoder/ih264d_parse_cabac.c
index 9d58f33..9d58f33 100755..100644
--- a/decoder/ih264d_parse_cabac.c
+++ b/decoder/ih264d_parse_cabac.c
diff --git a/decoder/ih264d_parse_cabac.h b/decoder/ih264d_parse_cabac.h
index eb66e8c..eb66e8c 100755..100644
--- a/decoder/ih264d_parse_cabac.h
+++ b/decoder/ih264d_parse_cabac.h
diff --git a/decoder/ih264d_parse_cavlc.c b/decoder/ih264d_parse_cavlc.c
index a3f345c..a3f345c 100755..100644
--- a/decoder/ih264d_parse_cavlc.c
+++ b/decoder/ih264d_parse_cavlc.c
diff --git a/decoder/ih264d_parse_cavlc.h b/decoder/ih264d_parse_cavlc.h
index 06105a3..06105a3 100755..100644
--- a/decoder/ih264d_parse_cavlc.h
+++ b/decoder/ih264d_parse_cavlc.h
diff --git a/decoder/ih264d_parse_headers.c b/decoder/ih264d_parse_headers.c
index de5bcb9..de5bcb9 100755..100644
--- a/decoder/ih264d_parse_headers.c
+++ b/decoder/ih264d_parse_headers.c
diff --git a/decoder/ih264d_parse_headers.h b/decoder/ih264d_parse_headers.h
index 3c829e7..3c829e7 100755..100644
--- a/decoder/ih264d_parse_headers.h
+++ b/decoder/ih264d_parse_headers.h
diff --git a/decoder/ih264d_parse_islice.c b/decoder/ih264d_parse_islice.c
index 7851a0b..7851a0b 100755..100644
--- a/decoder/ih264d_parse_islice.c
+++ b/decoder/ih264d_parse_islice.c
diff --git a/decoder/ih264d_parse_islice.h b/decoder/ih264d_parse_islice.h
index 6a43d7b..6a43d7b 100755..100644
--- a/decoder/ih264d_parse_islice.h
+++ b/decoder/ih264d_parse_islice.h
diff --git a/decoder/ih264d_parse_mb_header.c b/decoder/ih264d_parse_mb_header.c
index f30ad67..f30ad67 100755..100644
--- a/decoder/ih264d_parse_mb_header.c
+++ b/decoder/ih264d_parse_mb_header.c
diff --git a/decoder/ih264d_parse_mb_header.h b/decoder/ih264d_parse_mb_header.h
index 63067b9..63067b9 100755..100644
--- a/decoder/ih264d_parse_mb_header.h
+++ b/decoder/ih264d_parse_mb_header.h
diff --git a/decoder/ih264d_parse_pslice.c b/decoder/ih264d_parse_pslice.c
index 67d1405..67d1405 100755..100644
--- a/decoder/ih264d_parse_pslice.c
+++ b/decoder/ih264d_parse_pslice.c
diff --git a/decoder/ih264d_parse_slice.c b/decoder/ih264d_parse_slice.c
index 323df43..323df43 100755..100644
--- a/decoder/ih264d_parse_slice.c
+++ b/decoder/ih264d_parse_slice.c
diff --git a/decoder/ih264d_parse_slice.h b/decoder/ih264d_parse_slice.h
index cf5f9ce..cf5f9ce 100755..100644
--- a/decoder/ih264d_parse_slice.h
+++ b/decoder/ih264d_parse_slice.h
diff --git a/decoder/ih264d_process_bslice.c b/decoder/ih264d_process_bslice.c
index 69199cf..69199cf 100755..100644
--- a/decoder/ih264d_process_bslice.c
+++ b/decoder/ih264d_process_bslice.c
diff --git a/decoder/ih264d_process_bslice.h b/decoder/ih264d_process_bslice.h
index 5aa76e3..5aa76e3 100755..100644
--- a/decoder/ih264d_process_bslice.h
+++ b/decoder/ih264d_process_bslice.h
diff --git a/decoder/ih264d_process_intra_mb.c b/decoder/ih264d_process_intra_mb.c
index 96006ce..96006ce 100755..100644
--- a/decoder/ih264d_process_intra_mb.c
+++ b/decoder/ih264d_process_intra_mb.c
diff --git a/decoder/ih264d_process_intra_mb.h b/decoder/ih264d_process_intra_mb.h
index 30d7819..30d7819 100755..100644
--- a/decoder/ih264d_process_intra_mb.h
+++ b/decoder/ih264d_process_intra_mb.h
diff --git a/decoder/ih264d_process_pslice.c b/decoder/ih264d_process_pslice.c
index b1230f6..b1230f6 100755..100644
--- a/decoder/ih264d_process_pslice.c
+++ b/decoder/ih264d_process_pslice.c
diff --git a/decoder/ih264d_process_pslice.h b/decoder/ih264d_process_pslice.h
index 8740eb4..8740eb4 100755..100644
--- a/decoder/ih264d_process_pslice.h
+++ b/decoder/ih264d_process_pslice.h
diff --git a/decoder/ih264d_quant_scaling.c b/decoder/ih264d_quant_scaling.c
index fa9aeb5..fa9aeb5 100755..100644
--- a/decoder/ih264d_quant_scaling.c
+++ b/decoder/ih264d_quant_scaling.c
diff --git a/decoder/ih264d_quant_scaling.h b/decoder/ih264d_quant_scaling.h
index d9bd377..d9bd377 100755..100644
--- a/decoder/ih264d_quant_scaling.h
+++ b/decoder/ih264d_quant_scaling.h
diff --git a/decoder/ih264d_sei.c b/decoder/ih264d_sei.c
index 14ffcd6..14ffcd6 100755..100644
--- a/decoder/ih264d_sei.c
+++ b/decoder/ih264d_sei.c
diff --git a/decoder/ih264d_sei.h b/decoder/ih264d_sei.h
index 5033740..5033740 100755..100644
--- a/decoder/ih264d_sei.h
+++ b/decoder/ih264d_sei.h
diff --git a/decoder/ih264d_structs.h b/decoder/ih264d_structs.h
index 110f71d..110f71d 100755..100644
--- a/decoder/ih264d_structs.h
+++ b/decoder/ih264d_structs.h
diff --git a/decoder/ih264d_tables.c b/decoder/ih264d_tables.c
index ddca2fb..ddca2fb 100755..100644
--- a/decoder/ih264d_tables.c
+++ b/decoder/ih264d_tables.c
diff --git a/decoder/ih264d_tables.h b/decoder/ih264d_tables.h
index 04dfbd0..04dfbd0 100755..100644
--- a/decoder/ih264d_tables.h
+++ b/decoder/ih264d_tables.h
diff --git a/decoder/ih264d_thread_compute_bs.c b/decoder/ih264d_thread_compute_bs.c
index 6812d57..6812d57 100755..100644
--- a/decoder/ih264d_thread_compute_bs.c
+++ b/decoder/ih264d_thread_compute_bs.c
diff --git a/decoder/ih264d_thread_compute_bs.h b/decoder/ih264d_thread_compute_bs.h
index 1bef07f..1bef07f 100755..100644
--- a/decoder/ih264d_thread_compute_bs.h
+++ b/decoder/ih264d_thread_compute_bs.h
diff --git a/decoder/ih264d_thread_parse_decode.c b/decoder/ih264d_thread_parse_decode.c
index be3cb01..1c9eb68 100755..100644
--- a/decoder/ih264d_thread_parse_decode.c
+++ b/decoder/ih264d_thread_parse_decode.c
@@ -582,13 +582,9 @@ WORD32 ih264d_decode_slice_thread(dec_struct_t *ps_dec /* Decoder parameters */
void ih264d_decode_picture_thread(dec_struct_t *ps_dec )
{
- volatile WORD32 i4_err_status;
-
ithread_set_name("ih264d_decode_picture_thread");
-
-
// run the loop till all slices are decoded
while(1)
@@ -644,13 +640,6 @@ void ih264d_decode_picture_thread(dec_struct_t *ps_dec )
DEBUG_THREADS_PRINTF("Waiting for next slice or end of frame\n");
NOP(32);
- if(i4_err_status != 0)
- {
- /*In the case of error set decode Mb number ,so that the
- parse thread does not wait because of mb difference being
- greated the 32*/
- ps_dec->cur_dec_mb_num = ps_dec->u2_cur_mb_addr - 1;
- }
}
DEBUG_THREADS_PRINTF("Got next slice/end of frame signal \n ");
diff --git a/decoder/ih264d_thread_parse_decode.h b/decoder/ih264d_thread_parse_decode.h
index 013b14f..013b14f 100755..100644
--- a/decoder/ih264d_thread_parse_decode.h
+++ b/decoder/ih264d_thread_parse_decode.h
diff --git a/decoder/ih264d_transfer_address.h b/decoder/ih264d_transfer_address.h
index aa64b85..aa64b85 100755..100644
--- a/decoder/ih264d_transfer_address.h
+++ b/decoder/ih264d_transfer_address.h
diff --git a/decoder/ih264d_utils.c b/decoder/ih264d_utils.c
index f60d99c..f60d99c 100755..100644
--- a/decoder/ih264d_utils.c
+++ b/decoder/ih264d_utils.c
diff --git a/decoder/ih264d_utils.h b/decoder/ih264d_utils.h
index a1a64d5..a1a64d5 100755..100644
--- a/decoder/ih264d_utils.h
+++ b/decoder/ih264d_utils.h
diff --git a/decoder/ih264d_vui.c b/decoder/ih264d_vui.c
index 87276bd..87276bd 100755..100644
--- a/decoder/ih264d_vui.c
+++ b/decoder/ih264d_vui.c
diff --git a/decoder/ih264d_vui.h b/decoder/ih264d_vui.h
index e380a5b..e380a5b 100755..100644
--- a/decoder/ih264d_vui.h
+++ b/decoder/ih264d_vui.h
diff --git a/decoder/iv.h b/decoder/iv.h
index 3a2ebf5..3a2ebf5 100755..100644
--- a/decoder/iv.h
+++ b/decoder/iv.h
diff --git a/decoder/ivd.h b/decoder/ivd.h
index 955b81f..955b81f 100755..100644
--- a/decoder/ivd.h
+++ b/decoder/ivd.h
diff --git a/decoder/mips/ih264d_function_selector.c b/decoder/mips/ih264d_function_selector.c
index 13680ed..13680ed 100755..100644
--- a/decoder/mips/ih264d_function_selector.c
+++ b/decoder/mips/ih264d_function_selector.c
diff --git a/decoder/x86/ih264d_function_selector.c b/decoder/x86/ih264d_function_selector.c
index 9fc5c39..9fc5c39 100755..100644
--- a/decoder/x86/ih264d_function_selector.c
+++ b/decoder/x86/ih264d_function_selector.c
diff --git a/decoder/x86/ih264d_function_selector_sse42.c b/decoder/x86/ih264d_function_selector_sse42.c
index 0c493d2..0c493d2 100755..100644
--- a/decoder/x86/ih264d_function_selector_sse42.c
+++ b/decoder/x86/ih264d_function_selector_sse42.c
diff --git a/decoder/x86/ih264d_function_selector_ssse3.c b/decoder/x86/ih264d_function_selector_ssse3.c
index 1786213..1786213 100755..100644
--- a/decoder/x86/ih264d_function_selector_ssse3.c
+++ b/decoder/x86/ih264d_function_selector_ssse3.c
diff --git a/encoder.arm.mk b/encoder.arm.mk
index 874c81c..874c81c 100755..100644
--- a/encoder.arm.mk
+++ b/encoder.arm.mk
diff --git a/encoder.arm64.mk b/encoder.arm64.mk
index 99dbb06..99dbb06 100755..100644
--- a/encoder.arm64.mk
+++ b/encoder.arm64.mk
diff --git a/encoder.mips.mk b/encoder.mips.mk
index 92ae5de..92ae5de 100755..100644
--- a/encoder.mips.mk
+++ b/encoder.mips.mk
diff --git a/encoder.mips64.mk b/encoder.mips64.mk
index 5181fd9..5181fd9 100755..100644
--- a/encoder.mips64.mk
+++ b/encoder.mips64.mk
diff --git a/encoder.mk b/encoder.mk
index 5829118..5829118 100755..100644
--- a/encoder.mk
+++ b/encoder.mk
diff --git a/encoder.x86.mk b/encoder.x86.mk
index e9b6a5f..e9b6a5f 100755..100644
--- a/encoder.x86.mk
+++ b/encoder.x86.mk
diff --git a/encoder.x86_64.mk b/encoder.x86_64.mk
index deb004b..deb004b 100755..100644
--- a/encoder.x86_64.mk
+++ b/encoder.x86_64.mk
diff --git a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
index fe0ce17..fe0ce17 100755..100644
--- a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
+++ b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
diff --git a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
index 568e623..568e623 100755..100644
--- a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
+++ b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
diff --git a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
index e4dfca8..e4dfca8 100755..100644
--- a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
+++ b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
diff --git a/encoder/arm/ih264e_fmt_conv.s b/encoder/arm/ih264e_fmt_conv.s
index 2c04141..2c04141 100755..100644
--- a/encoder/arm/ih264e_fmt_conv.s
+++ b/encoder/arm/ih264e_fmt_conv.s
diff --git a/encoder/arm/ih264e_function_selector.c b/encoder/arm/ih264e_function_selector.c
index e4f67a0..e4f67a0 100755..100644
--- a/encoder/arm/ih264e_function_selector.c
+++ b/encoder/arm/ih264e_function_selector.c
diff --git a/encoder/arm/ih264e_function_selector_a9q.c b/encoder/arm/ih264e_function_selector_a9q.c
index 8b2879b..8b2879b 100755..100644
--- a/encoder/arm/ih264e_function_selector_a9q.c
+++ b/encoder/arm/ih264e_function_selector_a9q.c
diff --git a/encoder/arm/ih264e_function_selector_av8.c b/encoder/arm/ih264e_function_selector_av8.c
index 173c2d5..173c2d5 100755..100644
--- a/encoder/arm/ih264e_function_selector_av8.c
+++ b/encoder/arm/ih264e_function_selector_av8.c
diff --git a/encoder/arm/ih264e_half_pel.s b/encoder/arm/ih264e_half_pel.s
index 1b9a87a..1b9a87a 100755..100644
--- a/encoder/arm/ih264e_half_pel.s
+++ b/encoder/arm/ih264e_half_pel.s
diff --git a/encoder/arm/ih264e_platform_macros.h b/encoder/arm/ih264e_platform_macros.h
index 39cac96..39cac96 100755..100644
--- a/encoder/arm/ih264e_platform_macros.h
+++ b/encoder/arm/ih264e_platform_macros.h
diff --git a/encoder/arm/ime_distortion_metrics_a9q.s b/encoder/arm/ime_distortion_metrics_a9q.s
index b58911e..27fbe3d 100755..100644
--- a/encoder/arm/ime_distortion_metrics_a9q.s
+++ b/encoder/arm/ime_distortion_metrics_a9q.s
@@ -17,9 +17,9 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
+@**
-@/**
+@**
@******************************************************************************
@*
@*
@@ -48,7 +48,7 @@
@
-@/**
+@**
@******************************************************************************
@*
@* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
@@ -79,59 +79,62 @@
@* @remarks
@*
@******************************************************************************
-@*/
+@*
.text
.p2align 2
+
.global ime_compute_sad_16x16_fast_a9q
+
ime_compute_sad_16x16_fast_a9q:
- stmfd sp!, {r12, lr}
- lsl r2, r2, #1
- lsl r3, r3, #1
+ stmfd sp!, {r12, lr}
+ vpush {d8-d15}
+ lsl r2, r2, #1
+ lsl r3, r3, #1
@for bringing buffer2 into cache..., dummy load instructions
- @ LDR r12,[r1]
+ @LDR r12,[r1]
- vld1.8 {d4, d5}, [r0], r2
- vld1.8 {d6, d7}, [r1], r3
- mov r12, #6
- vld1.8 {d8, d9}, [r0], r2
- vabdl.u8 q0, d6, d4
- vabdl.u8 q1, d7, d5
- vld1.8 {d10, d11}, [r1], r3
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r12, #6
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
loop_sad_16x16_fast:
- vld1.8 {d4, d5}, [r0], r2
- vabal.u8 q0, d10, d8
- vabal.u8 q1, d11, d9
- vld1.8 {d6, d7}, [r1], r3
- subs r12, #2
- vld1.8 {d8, d9}, [r0], r2
- vabal.u8 q0, d6, d4
- vabal.u8 q1, d7, d5
- vld1.8 {d10, d11}, [r1], r3
-
- bne loop_sad_16x16_fast
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r12, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
- vabal.u8 q0, d10, d8
- vabal.u8 q1, d11, d9
+ bne loop_sad_16x16_fast
- vadd.i16 q0, q0, q1
- vadd.i16 d0, d1, d0
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
- ldr r12, [sp, #12]
- vpaddl.u16 d0, d0
- vpaddl.u32 d0, d0
- vshl.u32 d0, d0, #1
- vst1.32 {d0[0]}, [r12]
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+ vpop {d8-d15}
+ ldr r12, [sp, #12]
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vshl.u32 d0, d0, #1
+ vst1.32 {d0[0]}, [r12]
- ldmfd sp!, {r12, pc}
+ ldmfd sp!, {r12, pc}
-@/**
+@**
@******************************************************************************
@*
@* @brief computes distortion (SAD) between 2 16x8 blocks
@@ -163,56 +166,57 @@ loop_sad_16x16_fast:
@* @remarks
@*
@******************************************************************************
-@*/
+@*
@
.global ime_compute_sad_16x8_a9q
+
ime_compute_sad_16x8_a9q:
- stmfd sp!, {r12, lr}
+ stmfd sp!, {r12, lr}
@for bringing buffer2 into cache..., dummy load instructions
@LDR r12,[r1]
- vld1.8 {d4, d5}, [r0], r2
- vld1.8 {d6, d7}, [r1], r3
- mov r12, #6
- vld1.8 {d8, d9}, [r0], r2
- vabdl.u8 q0, d6, d4
- vabdl.u8 q1, d7, d5
- vld1.8 {d10, d11}, [r1], r3
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r12, #6
+ vpush {d8-d15}
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
loop_sad_16x8:
- vld1.8 {d4, d5}, [r0], r2
- vabal.u8 q0, d10, d8
- vabal.u8 q1, d11, d9
- vld1.8 {d6, d7}, [r1], r3
- subs r12, #2
- vld1.8 {d8, d9}, [r0], r2
- vabal.u8 q0, d6, d4
- vabal.u8 q1, d7, d5
- vld1.8 {d10, d11}, [r1], r3
-
- bne loop_sad_16x8
-
- vabal.u8 q0, d10, d8
- vabal.u8 q1, d11, d9
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r12, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
- vadd.i16 q0, q0, q1
- vadd.i16 d0, d1, d0
+ bne loop_sad_16x8
- ldr r12, [sp, #12]
- vpaddl.u16 d0, d0
- vpaddl.u32 d0, d0
- vst1.32 {d0[0]}, [r12]
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
- ldmfd sp!, {r12, pc}
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+ vpop {d8-d15}
+ ldr r12, [sp, #12]
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vst1.32 {d0[0]}, [r12]
+ ldmfd sp!, {r12, pc}
-@/**
+@**
@******************************************************************************
@*
@* @brief computes distortion (SAD) between 2 16x16 blocks with early exit
@@ -243,100 +247,103 @@ loop_sad_16x8:
@* @remarks
@*
@******************************************************************************
-@*/
+@*
+
.global ime_compute_sad_16x16_ea8_a9q
ime_compute_sad_16x16_ea8_a9q:
- stmfd sp!, {r5-r7, lr}
- lsl r2, r2, #1
- lsl r3, r3, #1
+ stmfd sp!, {r5-r7, lr}
+ lsl r2, r2, #1
+ lsl r3, r3, #1
@for bringing buffer2 into cache..., dummy load instructions
@LDR r12,[r1]
- vld1.8 {d4, d5}, [r0], r2
- vld1.8 {d6, d7}, [r1], r3
- mov r5, #6
- vld1.8 {d8, d9}, [r0], r2
- vabdl.u8 q0, d6, d4
- vabdl.u8 q1, d7, d5
- vld1.8 {d10, d11}, [r1], r3
- ldrd r6, r7, [sp, #16]
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r5, #6
+ ldrd r6, r7, [sp, #16]
+ vpush {d8-d15}
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
@r6 = i4_max_sad, r7 = pi4_mb_distortion
loop_sad_16x16_ea8_1:
- vld1.8 {d4, d5}, [r0], r2
- vabal.u8 q0, d10, d8
- vabal.u8 q1, d11, d9
- vld1.8 {d6, d7}, [r1], r3
- subs r5, #2
- vld1.8 {d8, d9}, [r0], r2
- vabal.u8 q0, d6, d4
- vabal.u8 q1, d7, d5
- vld1.8 {d10, d11}, [r1], r3
-
- bne loop_sad_16x16_ea8_1
-
- vabal.u8 q0, d10, d8
- sub r0, r0, r2, lsl #3
- vabal.u8 q1, d11, d9
- sub r1, r1, r3, lsl #3
-
- vadd.i16 q6, q0, q1
- add r0, r0, r2, asr #1
- vadd.i16 d12, d12, d13
- add r1, r1, r3, asr #1
-
- vpaddl.u16 d12, d12
- vld1.8 {d4, d5}, [r0], r2
- vld1.8 {d6, d7}, [r1], r3
- vpaddl.u32 d12, d12
- vld1.8 {d8, d9}, [r0], r2
- vabal.u8 q0, d6, d4
- vabal.u8 q1, d7, d5
-
- vst1.32 {d12[0]}, [r7]
- ldr r5, [r7]
- cmp r5, r6
- bgt end_func_16x16_ea8
-
- vld1.8 {d10, d11}, [r1], r3
- mov r5, #6
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r5, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x16_ea8_1
+
+ vabal.u8 q0, d10, d8
+ sub r0, r0, r2, lsl #3
+ vabal.u8 q1, d11, d9
+ sub r1, r1, r3, lsl #3
+
+ vadd.i16 q6, q0, q1
+ add r0, r0, r2, asr #1
+ vadd.i16 d12, d12, d13
+ add r1, r1, r3, asr #1
+
+ vpaddl.u16 d12, d12
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ vpaddl.u32 d12, d12
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+
+ vst1.32 {d12[0]}, [r7]
+ ldr r5, [r7]
+ cmp r5, r6
+ bgt end_func_16x16_ea8
+
+ vld1.8 {d10, d11}, [r1], r3
+ mov r5, #6
loop_sad_16x16_ea8_2:
- vld1.8 {d4, d5}, [r0], r2
- vabal.u8 q0, d10, d8
- vabal.u8 q1, d11, d9
- vld1.8 {d6, d7}, [r1], r3
- subs r5, #2
- vld1.8 {d8, d9}, [r0], r2
- vabal.u8 q0, d6, d4
- vabal.u8 q1, d7, d5
- vld1.8 {d10, d11}, [r1], r3
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r5, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
- bne loop_sad_16x16_ea8_2
+ bne loop_sad_16x16_ea8_2
- vabal.u8 q0, d10, d8
- vabal.u8 q1, d11, d9
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
- vadd.i16 q0, q0, q1
- vadd.i16 d0, d1, d0
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
- vpaddl.u16 d0, d0
- vpaddl.u32 d0, d0
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
- vst1.32 {d0[0]}, [r7]
+ vst1.32 {d0[0]}, [r7]
end_func_16x16_ea8:
-
- ldmfd sp!, {r5-r7, pc}
+ vpop {d8-d15}
+ ldmfd sp!, {r5-r7, pc}
-@/*
+@*
@//---------------------------------------------------------------------------
@// Function Name : Calculate_Mad2_prog()
@//
@@ -346,7 +353,7 @@ end_func_16x16_ea8:
@// Platform : CortexA8/NEON .
@//
@//-----------------------------------------------------------------------------
-@*/
+@*
.global ime_calculate_sad2_prog_a9q
@@ -358,72 +365,72 @@ ime_calculate_sad2_prog_a9q:
@ r3 = RefBufferWidth <UWORD32>
@ stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
- stmfd sp!, {r4-r5, lr}
-
- ldr r4, [sp, #8] @ load src stride to r4
- mov r5, #14
+ stmfd sp!, {r4-r5, lr}
+ ldr r4, [sp, #8] @ load src stride to r4
+ mov r5, #14
+ vpush {d8-d15}
@Row 1
- vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
- vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
- vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
+ vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
+ vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
@Row 2
- vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
- vabdl.u8 q6, d2, d0
- vabdl.u8 q7, d3, d1
- vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
- vabdl.u8 q8, d4, d0
- vabdl.u8 q9, d5, d1
- vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
+ vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
+ vabdl.u8 q6, d2, d0
+ vabdl.u8 q7, d3, d1
+ vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
+ vabdl.u8 q8, d4, d0
+ vabdl.u8 q9, d5, d1
+ vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
loop_sad2_prog:
- subs r5, #2
+ subs r5, #2
@Row 1
- vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
- vabal.u8 q6, d8, d6
- vabal.u8 q7, d9, d7
- vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
- vabal.u8 q8, d10, d6
- vabal.u8 q9, d11, d7
- vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
+ vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
+ vabal.u8 q6, d8, d6
+ vabal.u8 q7, d9, d7
+ vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
+ vabal.u8 q8, d10, d6
+ vabal.u8 q9, d11, d7
+ vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
@Row 2
- vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
- vabal.u8 q6, d2, d0
- vabal.u8 q7, d3, d1
- vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
- vabal.u8 q8, d4, d0
- vabal.u8 q9, d5, d1
- vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
+ vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
+ vabal.u8 q6, d2, d0
+ vabal.u8 q7, d3, d1
+ vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
+ vabal.u8 q8, d4, d0
+ vabal.u8 q9, d5, d1
+ vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
- bne loop_sad2_prog
+ bne loop_sad2_prog
- vabal.u8 q6, d8, d6
- vabal.u8 q7, d9, d7
- vabal.u8 q8, d10, d6
- vabal.u8 q9, d11, d7
+ vabal.u8 q6, d8, d6
+ vabal.u8 q7, d9, d7
+ vabal.u8 q8, d10, d6
+ vabal.u8 q9, d11, d7
@ Compute SAD
- vadd.u16 q6, q6, q7 @ Q6 : sad_ref1
- vadd.u16 q8, q8, q9 @ Q8 : sad_ref2
+ vadd.u16 q6, q6, q7 @ Q6 : sad_ref1
+ vadd.u16 q8, q8, q9 @ Q8 : sad_ref2
- vadd.u16 d12, d12, d13
- ldr r5, [sp, #16] @ loading pi4_sad to r5
- vadd.u16 d16, d16, d17
+ vadd.u16 d12, d12, d13
+ ldr r5, [sp, #16] @ loading pi4_sad to r5
+ vadd.u16 d16, d16, d17
- vpadd.u16 d12, d12, d16
- vpaddl.u16 d12, d12
+ vpadd.u16 d12, d12, d16
+ vpaddl.u16 d12, d12
- vst1.64 {d12}, [r5]!
+ vst1.64 {d12}, [r5]!
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r5, pc}
- ldmfd sp!, {r4-r5, pc}
-
-@/*
+@*
@//---------------------------------------------------------------------------
@// Function Name : Calculate_Mad3_prog()
@//
@@ -433,7 +440,7 @@ loop_sad2_prog:
@// Platform : CortexA8/NEON .
@//
@//-----------------------------------------------------------------------------
-@*/
+@*
.global ime_calculate_sad3_prog_a9q
@@ -446,90 +453,90 @@ ime_calculate_sad3_prog_a9q:
@ stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
- stmfd sp!, {r4-r6, lr}
-
- ldrd r4, r5, [sp, #16] @ load ref stride to r4, src stride to r5
- mov r6, #14
-
- @ Row 1
- vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
- vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
- vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
- vabdl.u8 q8, d2, d0
- vabdl.u8 q9, d3, d1
- vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
- vabdl.u8 q10, d4, d0
- vabdl.u8 q11, d5, d1
-
- @ Row 2
- vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
- vabdl.u8 q12, d6, d0
- vabdl.u8 q13, d7, d1
- vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
- vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
- vabal.u8 q8, d10, d8
- vabal.u8 q9, d11, d9
- vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
- vabal.u8 q10, d12, d8
- vabal.u8 q11, d13, d9
+ stmfd sp!, {r4-r6, lr}
+
+ ldrd r4, r5, [sp, #16] @ load ref stride to r4, src stride to r5
+ mov r6, #14
+ vpush {d8-d15}
+ @Row 1
+ vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
+ vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
+ vabdl.u8 q8, d2, d0
+ vabdl.u8 q9, d3, d1
+ vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
+ vabdl.u8 q10, d4, d0
+ vabdl.u8 q11, d5, d1
+
+ @Row 2
+ vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
+ vabdl.u8 q12, d6, d0
+ vabdl.u8 q13, d7, d1
+ vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d10, d8
+ vabal.u8 q9, d11, d9
+ vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d12, d8
+ vabal.u8 q11, d13, d9
loop_sad3_prog:
@Row 1
- vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
- vabal.u8 q12, d14, d8
- vabal.u8 q13, d15, d9
- vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
- vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
- vabal.u8 q8, d2, d0
- vabal.u8 q9, d3, d1
- vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
- vabal.u8 q10, d4, d0
- vabal.u8 q11, d5, d1
+ vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
+ vabal.u8 q12, d14, d8
+ vabal.u8 q13, d15, d9
+ vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d2, d0
+ vabal.u8 q9, d3, d1
+ vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d4, d0
+ vabal.u8 q11, d5, d1
@Row 2
- vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
- vabal.u8 q12, d6, d0
- vabal.u8 q13, d7, d1
- vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
- subs r6, #2
- vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
- vabal.u8 q8, d10, d8
- vabal.u8 q9, d11, d9
- vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
- vabal.u8 q10, d12, d8
- vabal.u8 q11, d13, d9
-
- bne loop_sad3_prog
-
- vabal.u8 q12, d14, d8
- vabal.u8 q13, d15, d9
+ vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
+ vabal.u8 q12, d6, d0
+ vabal.u8 q13, d7, d1
+ vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
+ subs r6, #2
+ vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d10, d8
+ vabal.u8 q9, d11, d9
+ vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d12, d8
+ vabal.u8 q11, d13, d9
+
+ bne loop_sad3_prog
+
+ vabal.u8 q12, d14, d8
+ vabal.u8 q13, d15, d9
@ Compute SAD
- vadd.u16 q8, q8, q9 @ Q8 : sad_ref1
- vadd.u16 q10, q10, q11 @ Q10 : sad_ref2
- vadd.u16 q12, q12, q13 @ Q12 : sad_ref3
+ vadd.u16 q8, q8, q9 @ Q8 : sad_ref1
+ vadd.u16 q10, q10, q11 @ Q10 : sad_ref2
+ vadd.u16 q12, q12, q13 @ Q12 : sad_ref3
- vadd.u16 d16, d16, d17
- vadd.u16 d20, d20, d21
- vadd.u16 d24, d24, d25
+ vadd.u16 d16, d16, d17
+ vadd.u16 d20, d20, d21
+ vadd.u16 d24, d24, d25
- vpadd.u16 d16, d16, d20
- vpadd.u16 d24, d24, d24
+ vpadd.u16 d16, d16, d20
+ vpadd.u16 d24, d24, d24
- ldr r6, [sp, #24] @ loading pi4_sad to r6
- vpaddl.u16 d16, d16
- vpaddl.u16 d24, d24
+ ldr r6, [sp, #24] @ loading pi4_sad to r6
+ vpaddl.u16 d16, d16
+ vpaddl.u16 d24, d24
- vst1.64 {d16}, [r6]!
- vst1.32 {d24[0]}, [r6]
+ vst1.64 {d16}, [r6]!
+ vst1.32 {d24[0]}, [r6]
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r6, pc}
- ldmfd sp!, {r4-r6, pc}
-
-@/**
+@**
@******************************************************************************
@*
@* @brief computes distortion (SAD) for sub-pel motion estimation
@@ -551,7 +558,7 @@ loop_sad3_prog:
@* @remarks
@*
@******************************************************************************
-@*/
+@*
.text
.p2align 2
@@ -560,115 +567,116 @@ loop_sad3_prog:
ime_sub_pel_compute_sad_16x16_a9q:
- stmfd sp!, {r4-r11, lr} @store register values to stack
+ stmfd sp!, {r4-r11, lr} @store register values to stack
- ldr r9, [sp, #36]
- ldr r10, [sp, #40]
+ ldr r9, [sp, #36]
+ ldr r10, [sp, #40]
+ vpush {d8-d15}
+ sub r4, r1, #1 @ x left
+ sub r5, r2, r10 @ y top
- sub r4, r1, #1 @ x left
- sub r5, r2, r10 @ y top
+ sub r6, r3, #1 @ xy left
+ sub r7, r3, r10 @ xy top
- sub r6, r3, #1 @ xy left
- sub r7, r3, r10 @ xy top
-
- sub r8, r7, #1 @ xy top-left
- mov r11, #15
+ sub r8, r7, #1 @ xy top-left
+ mov r11, #15
@for bringing buffer2 into cache..., dummy load instructions
@ LDR r12,[r1]
@ LDR r12,[sp,#12]
- vld1.8 {d0, d1}, [r0], r9 @ src
- vld1.8 {d2, d3}, [r5], r10 @ y top LOAD
- vld1.8 {d4, d5}, [r7], r10 @ xy top LOAD
- vld1.8 {d6, d7}, [r8], r10 @ xy top-left LOAD
-
- vabdl.u8 q6, d2, d0 @ y top ABS1
- vabdl.u8 q7, d4, d0 @ xy top ABS1
- vld1.8 {d8, d9}, [r1], r10 @ x LOAD
- vabdl.u8 q8, d6, d0 @ xy top-left ABS1
- vabdl.u8 q9, d8, d0 @ x ABS1
- vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
-
- vabal.u8 q6, d3, d1 @ y top ABS2
- vabal.u8 q7, d5, d1 @ xy top ABS2
- vld1.8 {d2, d3}, [r2], r10 @ y LOAD
- vabal.u8 q8, d7, d1 @ xy top-left ABS2
- vabal.u8 q9, d9, d1 @ x ABS2
- vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
-
- vabdl.u8 q10, d10, d0 @ x left ABS1
- vabdl.u8 q11, d2, d0 @ y ABS1
- vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
- vabdl.u8 q12, d4, d0 @ xy ABS1
- vabdl.u8 q13, d6, d0 @ xy left ABS1
+ vld1.8 {d0, d1}, [r0], r9 @ src
+ vld1.8 {d2, d3}, [r5], r10 @ y top LOAD
+ vld1.8 {d4, d5}, [r7], r10 @ xy top LOAD
+ vld1.8 {d6, d7}, [r8], r10 @ xy top-left LOAD
+
+ vabdl.u8 q6, d2, d0 @ y top ABS1
+ vabdl.u8 q7, d4, d0 @ xy top ABS1
+ vld1.8 {d8, d9}, [r1], r10 @ x LOAD
+ vabdl.u8 q8, d6, d0 @ xy top-left ABS1
+ vabdl.u8 q9, d8, d0 @ x ABS1
+ vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
+
+ vabal.u8 q6, d3, d1 @ y top ABS2
+ vabal.u8 q7, d5, d1 @ xy top ABS2
+ vld1.8 {d2, d3}, [r2], r10 @ y LOAD
+ vabal.u8 q8, d7, d1 @ xy top-left ABS2
+ vabal.u8 q9, d9, d1 @ x ABS2
+ vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
+
+ vabdl.u8 q10, d10, d0 @ x left ABS1
+ vabdl.u8 q11, d2, d0 @ y ABS1
+ vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
+ vabdl.u8 q12, d4, d0 @ xy ABS1
+ vabdl.u8 q13, d6, d0 @ xy left ABS1
loop_sub_pel_16x16:
- vabal.u8 q10, d11, d1 @ x left ABS2
- vabal.u8 q11, d3, d1 @ y ABS2
- subs r11, #1
- vabal.u8 q12, d5, d1 @ xy ABS2
- vabal.u8 q13, d7, d1 @ xy left ABS2
-
- vld1.8 {d0, d1}, [r0], r9 @ src
- vabal.u8 q6, d2, d0 @ y top ABS1
- vabal.u8 q7, d4, d0 @ xy top ABS1
- vld1.8 {d8, d9}, [r1], r10 @ x LOAD
- vabal.u8 q8, d6, d0 @ xy top-left ABS1
- vabal.u8 q9, d8, d0 @ x ABS1
- vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
-
- vabal.u8 q6, d3, d1 @ y top ABS2
- vabal.u8 q7, d5, d1 @ xy top ABS2
- vld1.8 {d2, d3}, [r2], r10 @ y LOAD
- vabal.u8 q8, d7, d1 @ xy top-left ABS2
- vabal.u8 q9, d9, d1 @ x ABS2
- vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
-
- vabal.u8 q10, d10, d0 @ x left ABS1
- vabal.u8 q11, d2, d0 @ y ABS1
- vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
- vabal.u8 q12, d4, d0 @ xy ABS1
- vabal.u8 q13, d6, d0 @ xy left ABS1
-
- bne loop_sub_pel_16x16
-
- vabal.u8 q10, d11, d1 @ x left ABS2
- vabal.u8 q11, d3, d1 @ y ABS2
- vabal.u8 q12, d5, d1 @ xy ABS2
- vabal.u8 q13, d7, d1 @ xy left ABS2
-
- vadd.i16 d0, d18, d19 @ x
- vadd.i16 d3, d12, d13 @ y top
- vadd.i16 d6, d14, d15 @ xy top
- vadd.i16 d5, d26, d27 @ xy left
- vadd.i16 d1, d20, d21 @ x left
- vadd.i16 d2, d22, d23 @ y
- vadd.i16 d4, d24, d25 @ xy
- vadd.i16 d7, d16, d17 @ xy top left
-
- vpadd.i16 d0, d0, d1
- vpadd.i16 d2, d2, d3
- vpadd.i16 d4, d4, d5
- vpadd.i16 d6, d6, d7
-
- vpaddl.u16 d0, d0
- vpaddl.u16 d2, d2
- ldr r11, [sp, #44]
- vpaddl.u16 d4, d4
- vpaddl.u16 d6, d6
-
- vst1.32 {d0}, [r11]!
- vst1.32 {d2}, [r11]!
- vst1.32 {d4}, [r11]!
- vst1.32 {d6}, [r11]!
-
- ldmfd sp!, {r4-r11, pc} @Restoring registers from stack
-
-
-
-@/**
+ vabal.u8 q10, d11, d1 @ x left ABS2
+ vabal.u8 q11, d3, d1 @ y ABS2
+ subs r11, #1
+ vabal.u8 q12, d5, d1 @ xy ABS2
+ vabal.u8 q13, d7, d1 @ xy left ABS2
+
+ vld1.8 {d0, d1}, [r0], r9 @ src
+ vabal.u8 q6, d2, d0 @ y top ABS1
+ vabal.u8 q7, d4, d0 @ xy top ABS1
+ vld1.8 {d8, d9}, [r1], r10 @ x LOAD
+ vabal.u8 q8, d6, d0 @ xy top-left ABS1
+ vabal.u8 q9, d8, d0 @ x ABS1
+ vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
+
+ vabal.u8 q6, d3, d1 @ y top ABS2
+ vabal.u8 q7, d5, d1 @ xy top ABS2
+ vld1.8 {d2, d3}, [r2], r10 @ y LOAD
+ vabal.u8 q8, d7, d1 @ xy top-left ABS2
+ vabal.u8 q9, d9, d1 @ x ABS2
+ vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
+
+ vabal.u8 q10, d10, d0 @ x left ABS1
+ vabal.u8 q11, d2, d0 @ y ABS1
+ vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
+ vabal.u8 q12, d4, d0 @ xy ABS1
+ vabal.u8 q13, d6, d0 @ xy left ABS1
+
+ bne loop_sub_pel_16x16
+
+ vabal.u8 q10, d11, d1 @ x left ABS2
+ vabal.u8 q11, d3, d1 @ y ABS2
+ vabal.u8 q12, d5, d1 @ xy ABS2
+ vabal.u8 q13, d7, d1 @ xy left ABS2
+
+ vadd.i16 d0, d18, d19 @ x
+ vadd.i16 d3, d12, d13 @ y top
+ vadd.i16 d6, d14, d15 @ xy top
+ vadd.i16 d5, d26, d27 @ xy left
+ vadd.i16 d1, d20, d21 @ x left
+ vadd.i16 d2, d22, d23 @ y
+ vadd.i16 d4, d24, d25 @ xy
+ vadd.i16 d7, d16, d17 @ xy top left
+
+ vpadd.i16 d0, d0, d1
+ vpadd.i16 d2, d2, d3
+ vpadd.i16 d4, d4, d5
+ vpadd.i16 d6, d6, d7
+
+ vpaddl.u16 d0, d0
+ vpaddl.u16 d2, d2
+ vpop {d8-d15}
+ ldr r11, [sp, #44]
+ vpaddl.u16 d4, d4
+ vpaddl.u16 d6, d6
+
+ vst1.32 {d0}, [r11]!
+ vst1.32 {d2}, [r11]!
+ vst1.32 {d4}, [r11]!
+ vst1.32 {d6}, [r11]!
+
+ ldmfd sp!, {r4-r11, pc} @Restoring registers from stack
+
+
+
+@**
@******************************************************************************
@*
@* @brief computes distortion (SAD) between 2 16x16 blocks
@@ -699,7 +707,7 @@ loop_sub_pel_16x16:
@* @remarks
@*
@******************************************************************************
-@*/
+@*
.text
.p2align 2
@@ -710,51 +718,52 @@ ime_compute_sad_16x16_a9q:
@STMFD sp!,{r12,lr}
- stmfd sp!, {r12, r14} @store register values to stack
+ stmfd sp!, {r12, r14} @store register values to stack
@for bringing buffer2 into cache..., dummy load instructions
@ LDR r12,[r1]
@ LDR r12,[sp,#12]
- vld1.8 {d4, d5}, [r0], r2
- vld1.8 {d6, d7}, [r1], r3
-
- mov r12, #14
- vld1.8 {d8, d9}, [r0], r2
- vabdl.u8 q0, d4, d6
- vld1.8 {d10, d11}, [r1], r3
- vabdl.u8 q1, d5, d7
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ vpush {d8-d15}
+ mov r12, #14
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d4, d6
+ vld1.8 {d10, d11}, [r1], r3
+ vabdl.u8 q1, d5, d7
loop_sad_16x16:
- vld1.8 {d4, d5}, [r0], r2
- vabal.u8 q0, d8, d10
- vld1.8 {d6, d7}, [r1], r3
- vabal.u8 q1, d9, d11
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d8, d10
+ vld1.8 {d6, d7}, [r1], r3
+ vabal.u8 q1, d9, d11
- vld1.8 {d8, d9}, [r0], r2
- vabal.u8 q0, d4, d6
- subs r12, #2
- vld1.8 {d10, d11}, [r1], r3
- vabal.u8 q1, d5, d7
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d4, d6
+ subs r12, #2
+ vld1.8 {d10, d11}, [r1], r3
+ vabal.u8 q1, d5, d7
- bne loop_sad_16x16
+ bne loop_sad_16x16
- vabal.u8 q0, d8, d10
- vabal.u8 q1, d9, d11
+ vabal.u8 q0, d8, d10
+ vabal.u8 q1, d9, d11
- vadd.i16 q0, q0, q1
- vadd.i16 d0, d1, d0
- ldr r12, [sp, #12]
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+ vpop {d8-d15}
+ ldr r12, [sp, #12]
- vpaddl.u16 d0, d0
- vpaddl.u32 d0, d0
- vst1.32 {d0[0]}, [r12]
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vst1.32 {d0[0]}, [r12]
- ldmfd sp!, {r12, pc} @Restoring registers from stack
+ ldmfd sp!, {r12, pc} @Restoring registers from stack
-@/*
+@*
@//---------------------------------------------------------------------------
@// Function Name : Calculate_Mad4_prog()
@//
@@ -764,7 +773,7 @@ loop_sad_16x16:
@// Platform : CortexA8/NEON .
@//
@//-----------------------------------------------------------------------------
-@*/
+@*
.global ime_calculate_sad4_prog_a9q
@@ -775,20 +784,20 @@ ime_calculate_sad4_prog_a9q:
@ r3 = CurBufferWidth <UWORD32>
@ stack = psad <UWORD32 *> {at 0x34}
- stmfd sp!, {r4-r7, lr}
+ stmfd sp!, {r4-r7, lr}
@UWORD8 *left_ptr = temp_frame - 1;
@UWORD8 *right_ptr = temp_frame + 1;
@UWORD8 *top_ptr = temp_frame - RefBufferWidth;
@UWORD8 *bot_ptr = temp_frame + RefBufferWidth;
- mov r7, #14
- sub r4, r0, #0x01 @r4 = left_ptr
- add r5, r0, #0x1 @r5 = right_ptr
- sub r6, r0, r2 @r6 = top_ptr
- add r0, r0, r2 @r0 = bot_ptr
+ mov r7, #14
+ sub r4, r0, #0x01 @r4 = left_ptr
+ add r5, r0, #0x1 @r5 = right_ptr
+ sub r6, r0, r2 @r6 = top_ptr
+ add r0, r0, r2 @r0 = bot_ptr
@r1 = buffer_ptr
-
+ vpush {d8-d15}
@D0:D1 : buffer
@D2:D3 : top
@D4:D5 : left
@@ -796,94 +805,93 @@ ime_calculate_sad4_prog_a9q:
@D8:D9 : bottom
@Row 1
- vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
- vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
- vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
+ vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
- vabdl.u8 q5, d2, d0
- vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
- vabdl.u8 q6, d3, d1
+ vabdl.u8 q5, d2, d0
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
+ vabdl.u8 q6, d3, d1
- vabdl.u8 q7, d0, d4
- vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
- vabdl.u8 q8, d1, d5
+ vabdl.u8 q7, d0, d4
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
+ vabdl.u8 q8, d1, d5
@Row 2
- vabdl.u8 q9, d0, d6
- vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
- vabdl.u8 q10, d1, d7
+ vabdl.u8 q9, d0, d6
+ vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
+ vabdl.u8 q10, d1, d7
- vabdl.u8 q11, d0, d8
- vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
- vabdl.u8 q12, d1, d9
+ vabdl.u8 q11, d0, d8
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
+ vabdl.u8 q12, d1, d9
loop_sad4_prog:
- vabal.u8 q5, d26, d2
- vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
- vabal.u8 q6, d27, d3
+ vabal.u8 q5, d26, d2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
+ vabal.u8 q6, d27, d3
- vabal.u8 q7, d26, d4
- vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
- vabal.u8 q8, d27, d5
+ vabal.u8 q7, d26, d4
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
+ vabal.u8 q8, d27, d5
- vabal.u8 q9, d26, d6
- vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
- vabal.u8 q10, d27, d7
+ vabal.u8 q9, d26, d6
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
+ vabal.u8 q10, d27, d7
@Row 1
- vabal.u8 q11, d26, d8
- vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
- vabal.u8 q12, d27, d9
-
- vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
- subs r7, #2
- vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
+ vabal.u8 q11, d26, d8
+ vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
+ vabal.u8 q12, d27, d9
- vabal.u8 q5, d0, d2
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
+ subs r7, #2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
- vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
- vabal.u8 q6, d1, d3
+ vabal.u8 q5, d0, d2
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
+ vabal.u8 q6, d1, d3
- vabal.u8 q7, d0, d4
- vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
- vabal.u8 q8, d1, d5
+ vabal.u8 q7, d0, d4
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
+ vabal.u8 q8, d1, d5
@Row 2
- vabal.u8 q9, d0, d6
- vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
- vabal.u8 q10, d1, d7
+ vabal.u8 q9, d0, d6
+ vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
+ vabal.u8 q10, d1, d7
- vabal.u8 q11, d0, d8
- vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
- vabal.u8 q12, d1, d9
+ vabal.u8 q11, d0, d8
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
+ vabal.u8 q12, d1, d9
- bne loop_sad4_prog
+ bne loop_sad4_prog
- vabal.u8 q5, d26, d2
- vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
- vabal.u8 q6, d27, d3
+ vabal.u8 q5, d26, d2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
+ vabal.u8 q6, d27, d3
- vabal.u8 q7, d26, d4
- vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
- vabal.u8 q8, d27, d5
+ vabal.u8 q7, d26, d4
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
+ vabal.u8 q8, d27, d5
- vabal.u8 q9, d26, d6
- vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
- vabal.u8 q10, d27, d7
+ vabal.u8 q9, d26, d6
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
+ vabal.u8 q10, d27, d7
- vabal.u8 q11, d26, d8
- vabal.u8 q12, d27, d9
+ vabal.u8 q11, d26, d8
+ vabal.u8 q12, d27, d9
@;Q5:Q6 : sad_top
@;Q7:Q8 : sad_left
@;Q9:Q10 : sad_right
@;Q11:Q12 : sad_bot
- vadd.u16 q5, q5, q6
- vadd.u16 q7, q7, q8
- vadd.u16 q9, q9, q10
- vadd.u16 q11, q11, q12
+ vadd.u16 q5, q5, q6
+ vadd.u16 q7, q7, q8
+ vadd.u16 q9, q9, q10
+ vadd.u16 q11, q11, q12
@; Free :-
@; Q6,Q8,Q10,Q12
@@ -893,10 +901,10 @@ loop_sad4_prog:
@;Q9 -> D18:D19
@;Q11 -> D22:D23
- vadd.u16 d10, d10, d11
- vadd.u16 d14, d14, d15
- vadd.u16 d18, d18, d19
- vadd.u16 d22, d22, d23
+ vadd.u16 d10, d10, d11
+ vadd.u16 d14, d14, d15
+ vadd.u16 d18, d18, d19
+ vadd.u16 d22, d22, d23
@;D10 : sad_top
@;D14 : sad_left
@@ -904,35 +912,35 @@ loop_sad4_prog:
@;D22 : sad_bot
- vpaddl.u16 d11, d10
- vpaddl.u16 d15, d14
- vpaddl.u16 d19, d18
- vpaddl.u16 d23, d22
+ vpaddl.u16 d11, d10
+ vpaddl.u16 d15, d14
+ vpaddl.u16 d19, d18
+ vpaddl.u16 d23, d22
@;D11 : sad_top
@;D15 : sad_left
@;D19 : sad_right
@;D23 : sad_bot
- vpaddl.u32 d10, d11
- vpaddl.u32 d22, d23
- vpaddl.u32 d14, d15
- vpaddl.u32 d18, d19
+ vpaddl.u32 d10, d11
+ vpaddl.u32 d22, d23
+ vpaddl.u32 d14, d15
+ vpaddl.u32 d18, d19
@;D10 : sad_top
@;D14 : sad_left
@;D18 : sad_right
@;D22 : sad_bot
- ldr r4, [sp, #20] @;Can be rearranged
-
- vsli.64 d10, d22, #32
- vsli.64 d14, d18, #32
+ ldr r4, [sp, #84] @;Can be rearranged
- vst1.64 {d14}, [r4]!
- vst1.64 {d10}, [r4]!
+ vsli.64 d10, d22, #32
+ vsli.64 d14, d18, #32
- ldmfd sp!, {r4-r7, pc}
+ vst1.64 {d14}, [r4]!
+ vst1.64 {d10}, [r4]!
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r7, pc}
@@ -974,37 +982,37 @@ ime_compute_satqd_16x16_lumainter_a9q:
@R5 :Distortion,ie SAD
@R6 :is nonzero
- push {r4-r12, lr} @push all the variables first
+ push {r4-r12, lr} @push all the variables first
@ADD SP,SP,#40 ;decrement stack pointer,to accomodate two variables
- ldr r4, [sp, #40] @load the threshold address
-
- mov r8, #8 @Number of 4x8 blocks to be processed
- mov r10, #0 @Sad
- mov r7, #0 @Nonzero info
+ ldr r4, [sp, #40] @load the threshold address
+ vpush {d8-d15}
+ mov r8, #8 @Number of 4x8 blocks to be processed
+ mov r10, #0 @Sad
+ mov r7, #0 @Nonzero info
@----------------------------------------------------
- vld1.u8 d30, [r0], r2 @I load 8 pix src row 1
+ vld1.u8 d30, [r0], r2 @I load 8 pix src row 1
- vld1.u8 d31, [r1], r3 @I load 8 pix pred row 1
+ vld1.u8 d31, [r1], r3 @I load 8 pix pred row 1
- vld1.u8 d28, [r0], r2 @I load 8 pix src row 2
+ vld1.u8 d28, [r0], r2 @I load 8 pix src row 2
- vld1.u8 d29, [r1], r3 @I load 8 pix pred row 2
+ vld1.u8 d29, [r1], r3 @I load 8 pix pred row 2
- vld1.u8 d26, [r0], r2 @I load 8 pix src row 3
- vabdl.u8 q0, d30, d31 @I Abs diff r1 blk 12
+ vld1.u8 d26, [r0], r2 @I load 8 pix src row 3
+ vabdl.u8 q0, d30, d31 @I Abs diff r1 blk 12
- vld1.u8 d27, [r1], r3 @I load 8 pix pred row 3
+ vld1.u8 d27, [r1], r3 @I load 8 pix pred row 3
- vld1.u8 d24, [r0], r2 @I load 8 pix src row 4
+ vld1.u8 d24, [r0], r2 @I load 8 pix src row 4
- vld1.u8 d25, [r1], r3 @I load 8 pix pred row 4
- vabdl.u8 q1, d28, d29 @I Abs diff r1 blk 12
+ vld1.u8 d25, [r1], r3 @I load 8 pix pred row 4
+ vabdl.u8 q1, d28, d29 @I Abs diff r1 blk 12
- vld1.u16 {q11}, [r4] @I load the threhold
- vabdl.u8 q2, d26, d27 @I Abs diff r1 blk 12
+ vld1.u16 {q11}, [r4] @I load the threhold
+ vabdl.u8 q2, d26, d27 @I Abs diff r1 blk 12
- vabdl.u8 q3, d24, d25 @I Abs diff r1 blk 12
+ vabdl.u8 q3, d24, d25 @I Abs diff r1 blk 12
@@ -1013,128 +1021,128 @@ core_loop:
@S5 S6 S7 S8 A5 A6 A7 A8
@S9 S10 S11 S12 A9 A10 A11 A12
@S13 S14 S15 S16 A13 A14 A15 A16
- ands r11, r8, #1 @II See if we are at even or odd block
- vadd.u16 q4 , q0, q3 @I Add r1 r4
- lsl r11, r2, #2 @II Move back src 4 rows
+ ands r11, r8, #1 @II See if we are at even or odd block
+ vadd.u16 q4 , q0, q3 @I Add r1 r4
+ lsl r11, r2, #2 @II Move back src 4 rows
- subeq r0, r0, r11 @II Move back src 4 rows if we are at even block
- vadd.u16 q5 , q1, q2 @I Add r2 r3
- addeq r0, r0, #8 @II Move src 8 cols forward if we are at even block
+ subeq r0, r0, r11 @II Move back src 4 rows if we are at even block
+ vadd.u16 q5 , q1, q2 @I Add r2 r3
+ addeq r0, r0, #8 @II Move src 8 cols forward if we are at even block
- lsl r11, r3, #2 @II Move back pred 4 rows
- vtrn.16 d8 , d10 @I trnspse 1
- subeq r1, r1, r11 @II Move back pred 4 rows if we are at even block
+ lsl r11, r3, #2 @II Move back pred 4 rows
+ vtrn.16 d8 , d10 @I trnspse 1
+ subeq r1, r1, r11 @II Move back pred 4 rows if we are at even block
- addeq r1, r1, #8 @II Move pred 8 cols forward if we are at even block
- vtrn.16 d9 , d11 @I trnspse 2
- subne r0, r0, #8 @II Src 8clos back for odd rows
+ addeq r1, r1, #8 @II Move pred 8 cols forward if we are at even block
+ vtrn.16 d9 , d11 @I trnspse 2
+ subne r0, r0, #8 @II Src 8clos back for odd rows
- subne r1, r1, #8 @II Pred 8 cols back for odd rows
- vtrn.32 d10, d11 @I trnspse 4
+ subne r1, r1, #8 @II Pred 8 cols back for odd rows
+ vtrn.32 d10, d11 @I trnspse 4
- vtrn.32 d8 , d9 @I trnspse 3
- vswp d10, d11 @I rearrange so that the q4 and q5 add properly
+ vtrn.32 d8 , d9 @I trnspse 3
+ vswp d10, d11 @I rearrange so that the q4 and q5 add properly
@D8 S1 S4 A1 A4
@D9 S2 S3 A2 A3
@D11 S1 S4 A1 A4
@D10 S2 S3 A2 A3
- vadd.s16 q6, q4, q5 @I Get s1 s4
- vld1.u8 d30, [r0], r2 @II load first 8 pix src row 1
+ vadd.s16 q6, q4, q5 @I Get s1 s4
+ vld1.u8 d30, [r0], r2 @II load first 8 pix src row 1
- vtrn.s16 d12, d13 @I Get s2 s3
+ vtrn.s16 d12, d13 @I Get s2 s3
@D12 S1 S4 A1 A4
@D13 S2 S3 A2 A3
- vshl.s16 q7, q6 , #1 @I si = si<<1
- vld1.u8 d31, [r1], r3 @II load first 8 pix pred row 1
+ vshl.s16 q7, q6 , #1 @I si = si<<1
+ vld1.u8 d31, [r1], r3 @II load first 8 pix pred row 1
- vpadd.s16 d16, d12, d13 @I (s1 + s4) (s2 + s3)
- vld1.u8 d28, [r0], r2 @II load first 8 pix src row 2
+ vpadd.s16 d16, d12, d13 @I (s1 + s4) (s2 + s3)
+ vld1.u8 d28, [r0], r2 @II load first 8 pix src row 2
@ D16 S14 A14 S23 A23
- vrev32.16 d0, d16 @I
- vuzp.s16 d16, d0 @I
+ vrev32.16 d0, d16 @I
+ vuzp.s16 d16, d0 @I
@D16 S14 S23 A14 A23
- vadd.s16 d17, d12, d13 @I (s1 + s2) (s3 + s4)
- vld1.u8 d29, [r1], r3 @II load first 8 pix pred row 2
+ vadd.s16 d17, d12, d13 @I (s1 + s2) (s3 + s4)
+ vld1.u8 d29, [r1], r3 @II load first 8 pix pred row 2
@D17 S12 S34 A12 A34
- vrev32.16 q9, q7 @I Rearrange si's
+ vrev32.16 q9, q7 @I Rearrange si's
@Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
@D12 S1 S4 A1 A4
@D19 Z3 Z2 Y3 Y2
- vsub.s16 d8, d12, d19 @I (s1 - (s3<<1)) (s4 - (s2<<1))
- vld1.u8 d26, [r0], r2 @II load first 8 pix src row 3
+ vsub.s16 d8, d12, d19 @I (s1 - (s3<<1)) (s4 - (s2<<1))
+ vld1.u8 d26, [r0], r2 @II load first 8 pix src row 3
@D13 S2 S3 A2 A3
@D18 Z4 Z1 Y4 Y1
- vsub.s16 d9, d13, d18 @I (s2 - (s4<<1)) (s3 - (s1<<1))
- vld1.u8 d27, [r1], r3 @II load first 8 pix pred row 3
+ vsub.s16 d9, d13, d18 @I (s2 - (s4<<1)) (s3 - (s1<<1))
+ vld1.u8 d27, [r1], r3 @II load first 8 pix pred row 3
@Q10 S8 S5 A8 A5 S7 S4 A7 A4
@D16 S14 S23 A14 A23
- vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
- vld1.u8 d24, [r0], r2 @II load first 8 pix src row 4
+ vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
+ vld1.u8 d24, [r0], r2 @II load first 8 pix src row 4
@D22 SAD1 SAD2 junk junk
@Q8 S2 S1 A2 A1 S6 S3 A6 A3
@Q10 S8 S5 A8 A5 S7 S4 A7 A4
- vtrn.32 q8, q4 @I Rearrange to make ls of each block togather
+ vtrn.32 q8, q4 @I Rearrange to make ls of each block togather
@Q8 S2 S1 S8 S5 S6 S3 S7 S4
@Q10 A2 A1 A8 A5 A6 A3 A7 A4
- ldrh r11, [r4, #16] @I Load the threshold for DC val blk 1
- vdup.s16 q6, d10[0] @I Get the sad blk 1
- vabdl.u8 q0, d30, d31 @II Abs diff r1 blk 12
+ ldrh r11, [r4, #16] @I Load the threshold for DC val blk 1
+ vdup.s16 q6, d10[0] @I Get the sad blk 1
+ vabdl.u8 q0, d30, d31 @II Abs diff r1 blk 12
- vshl.s16 q7, q6, #1 @I sad_2 = sad_1<<1
- vmov.s16 r9, d10[0] @I Get the sad for block 1
+ vshl.s16 q7, q6, #1 @I sad_2 = sad_1<<1
+ vmov.s16 r9, d10[0] @I Get the sad for block 1
- vsub.s16 q9, q7, q8 @I Add to the lss
- vmov.s16 r5, d10[1] @I Get the sad for block 2
+ vsub.s16 q9, q7, q8 @I Add to the lss
+ vmov.s16 r5, d10[1] @I Get the sad for block 2
- vcle.s16 q7, q11, q9 @I Add to the lss
- vld1.u8 d25, [r1], r3 @II load first 8 pix pred row 4
+ vcle.s16 q7, q11, q9 @I Add to the lss
+ vld1.u8 d25, [r1], r3 @II load first 8 pix pred row 4
- vdup.s16 q15, d10[1] @I Get the sad blk 1
- vabdl.u8 q1, d28, d29 @II Abs diff r1 blk 12
+ vdup.s16 q15, d10[1] @I Get the sad blk 1
+ vabdl.u8 q1, d28, d29 @II Abs diff r1 blk 12
- vshl.s16 q14, q15, #1 @I sad_2 = sad_1<<1
- vsub.s16 q3, q14, q4 @I Add to the lss
- vcle.s16 q15, q11, q3 @I Add to the lss
+ vshl.s16 q14, q15, #1 @I sad_2 = sad_1<<1
+ vsub.s16 q3, q14, q4 @I Add to the lss
+ vcle.s16 q15, q11, q3 @I Add to the lss
- ADD R10, R10, R9 @I Add to the global sad blk 1
- vtrn.u8 q15, q7 @I get all comparison bits to one reg
- vabdl.u8 q2, d26, d27 @II Abs diff r1 blk 12
+ ADD R10, R10, R9 @I Add to the global sad blk 1
+ vtrn.u8 q15, q7 @I get all comparison bits to one reg
+ vabdl.u8 q2, d26, d27 @II Abs diff r1 blk 12
- ADD R10, R10, R5 @I Add to the global sad blk 2
- vshr.u8 q14, q15, #7 @I Shift the bits so that no overflow occurs
- cmp r11, r9
+ ADD R10, R10, R5 @I Add to the global sad blk 2
+ vshr.u8 q14, q15, #7 @I Shift the bits so that no overflow occurs
+ cmp r11, r9
- movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 1 ;I Compare with threshold blk 1
- vadd.u8 d28, d28, d29 @I Add the bits
- cmp r11, r5 @I Compare with threshold blk 2
+ movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 1 ;I Compare with threshold blk 1
+ vadd.u8 d28, d28, d29 @I Add the bits
+ cmp r11, r5 @I Compare with threshold blk 2
- movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 2
- vpadd.u8 d28, d28, d29 @I Add the bits
+ movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 2
+ vpadd.u8 d28, d28, d29 @I Add the bits
- vmov.u32 r11, d28[0] @I Since a set bit now represents a unstatisofrd contifon store it in r11
- vabdl.u8 q3, d24, d25 @II Abs diff r1 blk 12
+ vmov.u32 r11, d28[0] @I Since a set bit now represents a unstatisofrd contifon store it in r11
+ vabdl.u8 q3, d24, d25 @II Abs diff r1 blk 12
- orr r7, r7, r11 @I get the guy to r11
+ orr r7, r7, r11 @I get the guy to r11
- sub r8, r8, #1 @I Decremrnt block count
+ sub r8, r8, #1 @I Decremrnt block count
- cmp r7, #0 @I If we have atlest one non zero block
- bne compute_sad_only @I if a non zero block is der,From now on compute sad only
+ cmp r7, #0 @I If we have atlest one non zero block
+ bne compute_sad_only @I if a non zero block is der,From now on compute sad only
- cmp r8, #1 @I See if we are at the last block
- bne core_loop @I If the blocks are zero, lets continue the satdq
+ cmp r8, #1 @I See if we are at the last block
+ bne core_loop @I If the blocks are zero, lets continue the satdq
@EPILOUGE for core loop
@@ -1142,94 +1150,94 @@ core_loop:
@S5 S6 S7 S8 A5 A6 A7 A8
@S9 S10 S11 S12 A9 A10 A11 A12
@S13 S14 S15 S16 A13 A14 A15 A16
- vadd.u16 q4 , q0, q3 @Add r1 r4
- vadd.u16 q5 , q1, q2 @Add r2 r3
+ vadd.u16 q4 , q0, q3 @Add r1 r4
+ vadd.u16 q5 , q1, q2 @Add r2 r3
@D8 S1 S2 S2 S1
@D10 S4 S3 S3 S4
@D9 A1 A2 A2 A1
@D11 A4 A3 A3 A4
- vtrn.16 d8 , d10 @I trnspse 1
- vtrn.16 d9 , d11 @I trnspse 2
- vtrn.32 d8 , d9 @I trnspse 3
- vtrn.32 d10, d11 @I trnspse 4
+ vtrn.16 d8 , d10 @I trnspse 1
+ vtrn.16 d9 , d11 @I trnspse 2
+ vtrn.32 d8 , d9 @I trnspse 3
+ vtrn.32 d10, d11 @I trnspse 4
- vswp d10, d11 @I rearrange so that the q4 and q5 add properly
+ vswp d10, d11 @I rearrange so that the q4 and q5 add properly
@D8 S1 S4 A1 A4
@D9 S2 S3 A2 A3
@D11 S1 S4 A1 A4
@D10 S2 S3 A2 A3
- vadd.s16 q6, q4, q5 @Get s1 s4
- vtrn.s16 d12, d13 @Get s2 s3
+ vadd.s16 q6, q4, q5 @Get s1 s4
+ vtrn.s16 d12, d13 @Get s2 s3
@D12 S1 S4 A1 A4
@D13 S2 S3 A2 A3
- vshl.s16 q7, q6 , #1 @si = si<<1
- vmov.s16 r9, d10[0] @Get the sad for block 1
+ vshl.s16 q7, q6 , #1 @si = si<<1
+ vmov.s16 r9, d10[0] @Get the sad for block 1
- vpadd.s16 d16, d12, d13 @(s1 + s4) (s2 + s3)
- vmov.s16 r5, d10[1] @Get the sad for block 2
+ vpadd.s16 d16, d12, d13 @(s1 + s4) (s2 + s3)
+ vmov.s16 r5, d10[1] @Get the sad for block 2
@D16 S14 A14 S23 A23
- vrev32.16 d30, d16 @
- vuzp.s16 d16, d30 @
+ vrev32.16 d30, d16 @
+ vuzp.s16 d16, d30 @
@D16 S14 S23 A14 A23
- vadd.s16 d17, d12, d13 @(s1 + s2) (s3 + s4)
+ vadd.s16 d17, d12, d13 @(s1 + s2) (s3 + s4)
@D17 S12 S34 A12 A34
- vrev32.16 q9, q7 @Rearrange si's
+ vrev32.16 q9, q7 @Rearrange si's
@Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
@D12 S1 S4 A1 A4
@D19 Z3 Z2 Y3 Y2
- vsub.s16 d8, d12, d19 @(s1 - (s3<<1)) (s4 - (s2<<1))
+ vsub.s16 d8, d12, d19 @(s1 - (s3<<1)) (s4 - (s2<<1))
@D13 S2 S3 A2 A3
@D18 Z4 Z1 Y4 Y1
- vsub.s16 d9, d13, d18 @(s2 - (s4<<1)) (s3 - (s1<<1))
+ vsub.s16 d9, d13, d18 @(s2 - (s4<<1)) (s3 - (s1<<1))
@Q10 S8 S5 A8 A5 S7 S4 A7 A4
@D16 S14 S23 A14 A23
- vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
+ vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
@D22 SAD1 SAD2 junk junk
- vmov.u16 r9, d10[0] @Get the sad for block 1
- vmov.u16 r5, d10[1] @Get the sad for block 2
+ vmov.u16 r9, d10[0] @Get the sad for block 1
+ vmov.u16 r5, d10[1] @Get the sad for block 2
@Q8 S2 S1 A2 A1 S6 S3 A6 A3
@Q10 S8 S5 A8 A5 S7 S4 A7 A4
- ldrh r11, [r4, #16] @Load the threshold for DC val blk 1
- vtrn.32 q8, q4 @Rearrange to make ls of each block togather
- ADD R10, R10, R9 @Add to the global sad blk 1
+ ldrh r11, [r4, #16] @Load the threshold for DC val blk 1
+ vtrn.32 q8, q4 @Rearrange to make ls of each block togather
+ ADD R10, R10, R9 @Add to the global sad blk 1
@Q8 S2 S1 S8 S5 S6 S3 S7 S4
@Q10 A2 A1 A8 A5 A6 A3 A7 A4
- vld1.u16 {q11}, [r4] @load the threhold
- ADD R10, R10, R5 @Add to the global sad blk 2
+ vld1.u16 {q11}, [r4] @load the threhold
+ ADD R10, R10, R5 @Add to the global sad blk 2
- vdup.u16 q6, d10[0] @Get the sad blk 1
+ vdup.u16 q6, d10[0] @Get the sad blk 1
- cmp r11, r9 @Compare with threshold blk 1
- vshl.u16 q7, q6, #1 @sad_2 = sad_1<<1
+ cmp r11, r9 @Compare with threshold blk 1
+ vshl.u16 q7, q6, #1 @sad_2 = sad_1<<1
- vsub.s16 q9, q7, q8 @Add to the lss
+ vsub.s16 q9, q7, q8 @Add to the lss
- vcle.s16 q15, q11, q9 @Add to the lss
- movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 1
+ vcle.s16 q15, q11, q9 @Add to the lss
+ movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 1
- cmp r11, r5 @Compare with threshold blk 2
- vdup.u16 q14, d10[1] @Get the sad blk 1
+ cmp r11, r5 @Compare with threshold blk 2
+ vdup.u16 q14, d10[1] @Get the sad blk 1
- vshl.u16 q13, q14, #1 @sad_2 = sad_1<<1
- vsub.s16 q12, q13, q4 @Add to the lss
- vcle.s16 q14, q11, q12 @Add to the lss
- movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 2
+ vshl.u16 q13, q14, #1 @sad_2 = sad_1<<1
+ vsub.s16 q12, q13, q4 @Add to the lss
+ vcle.s16 q14, q11, q12 @Add to the lss
+ movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 2
- vtrn.u8 q14, q15 @get all comparison bits to one reg
- vshr.u8 q14, q14, #7 @Shift the bits so that no overflow occurs
- vadd.u8 d28, d28, d29 @Add the bits
- vpadd.u8 d28, d28, d29 @Add the bits
- vmov.u32 r11, d28[0] @Since a set bit now represents a unstatisofrd contifon store it in r11
- orr r7, r7, r11 @get the guy to r11
+ vtrn.u8 q14, q15 @get all comparison bits to one reg
+ vshr.u8 q14, q14, #7 @Shift the bits so that no overflow occurs
+ vadd.u8 d28, d28, d29 @Add the bits
+ vpadd.u8 d28, d28, d29 @Add the bits
+ vmov.u32 r11, d28[0] @Since a set bit now represents a unstatisofrd contifon store it in r11
+ orr r7, r7, r11 @get the guy to r11
- b funcend_sad_16x16 @Since all blocks ar processed nw, got to end
+ b funcend_sad_16x16 @Since all blocks ar processed nw, got to end
compute_sad_only: @This block computes SAD only, so will be lighter
@IT will start processign at n odd block
@@ -1237,117 +1245,119 @@ compute_sad_only: @This block computes SAD only, so will b
@and then for two blocks at a time
@The counter is r7, hence r7 blocks will be processed
- and r11, r8, #1 @Get the last bit of counter
- cmp r11, #0 @See if we are at even or odd block
+ and r11, r8, #1 @Get the last bit of counter
+ cmp r11, #0 @See if we are at even or odd block
@iif the blk is even we just have to set the pointer to the
@start of current row
- lsleq r11, r2, #2 @I Move back src 4 rows
- subeq r0, r0, r11 @I Move back src 4 rows if we are at even block
+ lsleq r11, r2, #2 @I Move back src 4 rows
+ subeq r0, r0, r11 @I Move back src 4 rows if we are at even block
- lsleq r11, r3, #2 @I Move back pred 4 rows
- subeq r1, r1, r11 @I Move back pred 4 rows if we are at even block
+ lsleq r11, r3, #2 @I Move back pred 4 rows
+ subeq r1, r1, r11 @I Move back pred 4 rows if we are at even block
@ADDEQ R8,R8,#2 ;Inc counter
- beq skip_odd_blk @If the blk is odd we have to compute sad
+ beq skip_odd_blk @If the blk is odd we have to compute sad
- vadd.u16 q4, q0, q1 @Add SAD of row1 and row2
- vadd.u16 q5, q2, q3 @Add SAD of row3 and row4
- vadd.u16 q6, q4, q5 @Add SAD of row 1-4
- vadd.u16 d14, d12, d13 @Add Blk1 and blk2
- vpadd.u16 d16, d14, d15 @Add col 1-2 and 3-4
- vpadd.u16 d18, d16, d17 @Add col 12-34
+ vadd.u16 q4, q0, q1 @Add SAD of row1 and row2
+ vadd.u16 q5, q2, q3 @Add SAD of row3 and row4
+ vadd.u16 q6, q4, q5 @Add SAD of row 1-4
+ vadd.u16 d14, d12, d13 @Add Blk1 and blk2
+ vpadd.u16 d16, d14, d15 @Add col 1-2 and 3-4
+ vpadd.u16 d18, d16, d17 @Add col 12-34
- vmov.u16 r9, d18[0] @Move sad to arm
- ADD R10, R10, R9 @Add to the global sad
+ vmov.u16 r9, d18[0] @Move sad to arm
+ ADD R10, R10, R9 @Add to the global sad
- sub r8, r8, #1 @Dec counter
- cmp r8, #0 @See if we processed last block
- beq funcend_sad_16x16 @if lprocessed last block goto end of func
+ sub r8, r8, #1 @Dec counter
+ cmp r8, #0 @See if we processed last block
+ beq funcend_sad_16x16 @if lprocessed last block goto end of func
- sub r0, r0, #8 @Since we processed od block move back src by 8 cols
- sub r1, r1, #8 @Since we processed od block move back pred by 8 cols
+ sub r0, r0, #8 @Since we processed od block move back src by 8 cols
+ sub r1, r1, #8 @Since we processed od block move back pred by 8 cols
skip_odd_blk:
- vmov.s16 q0, #0 @Initialize the accumulator
- vmov.s16 q1, #0 @Initialize the accumulator
+ vmov.s16 q0, #0 @Initialize the accumulator
+ vmov.s16 q1, #0 @Initialize the accumulator
- vld1.u8 {q15}, [r0], r2 @load src r1
- vld1.u8 {q14}, [r1], r3 @load pred r1
+ vld1.u8 {q15}, [r0], r2 @load src r1
+ vld1.u8 {q14}, [r1], r3 @load pred r1
- vld1.u8 {q13}, [r0], r2 @load src r2
- vld1.u8 {q12}, [r1], r3 @load pred r2
+ vld1.u8 {q13}, [r0], r2 @load src r2
+ vld1.u8 {q12}, [r1], r3 @load pred r2
- vld1.u8 {q11}, [r0], r2 @load src r3
- vld1.u8 {q10}, [r1], r3 @load pred r2
+ vld1.u8 {q11}, [r0], r2 @load src r3
+ vld1.u8 {q10}, [r1], r3 @load pred r2
- vld1.u8 {q9}, [r0], r2 @load src r4
- vld1.u8 {q8}, [r1], r3 @load pred r4
+ vld1.u8 {q9}, [r0], r2 @load src r4
+ vld1.u8 {q8}, [r1], r3 @load pred r4
- cmp r8, #2
- beq sad_epilouge
+ cmp r8, #2
+ beq sad_epilouge
sad_loop:
- vabal.u8 q0, d30, d28 @I accumulate Abs diff R1
- vabal.u8 q1, d31, d29 @I accumulate Abs diff R1
+ vabal.u8 q0, d30, d28 @I accumulate Abs diff R1
+ vabal.u8 q1, d31, d29 @I accumulate Abs diff R1
- vld1.u8 {q15}, [r0], r2 @II load r1 src
- vabal.u8 q0, d26, d24 @I accumulate Abs diff R2
+ vld1.u8 {q15}, [r0], r2 @II load r1 src
+ vabal.u8 q0, d26, d24 @I accumulate Abs diff R2
- vld1.u8 {q14}, [r1], r3 @II load r1 pred
- vabal.u8 q1, d27, d25 @I accumulate Abs diff R2
+ vld1.u8 {q14}, [r1], r3 @II load r1 pred
+ vabal.u8 q1, d27, d25 @I accumulate Abs diff R2
- vld1.u8 {q13}, [r0], r2 @II load r3 src
- vabal.u8 q0, d22, d20 @I accumulate Abs diff R3
+ vld1.u8 {q13}, [r0], r2 @II load r3 src
+ vabal.u8 q0, d22, d20 @I accumulate Abs diff R3
- vld1.u8 {q12}, [r1], r3 @II load r2 pred
- vabal.u8 q1, d23, d21 @I accumulate Abs diff R3
+ vld1.u8 {q12}, [r1], r3 @II load r2 pred
+ vabal.u8 q1, d23, d21 @I accumulate Abs diff R3
- vld1.u8 {q11}, [r0], r2 @II load r3 src
- vabal.u8 q0, d18, d16 @I accumulate Abs diff R4
+ vld1.u8 {q11}, [r0], r2 @II load r3 src
+ vabal.u8 q0, d18, d16 @I accumulate Abs diff R4
- sub r8, r8, #2 @Since we processe 16 pix @a time, dec by 2
- vld1.u8 {q10}, [r1], r3 @II load r3 pred
- vabal.u8 q1, d19, d17 @I accumulate Abs diff R4
+ sub r8, r8, #2 @Since we processe 16 pix @a time, dec by 2
+ vld1.u8 {q10}, [r1], r3 @II load r3 pred
+ vabal.u8 q1, d19, d17 @I accumulate Abs diff R4
- cmp r8, #2 @Check if last loop
- vld1.u8 {q9}, [r0], r2 @II load r4 src
- vld1.u8 {q8}, [r1], r3 @II load r4 pred
+ cmp r8, #2 @Check if last loop
+ vld1.u8 {q9}, [r0], r2 @II load r4 src
+ vld1.u8 {q8}, [r1], r3 @II load r4 pred
- bne sad_loop @Go back to SAD computation
+ bne sad_loop @Go back to SAD computation
sad_epilouge:
- vabal.u8 q0, d30, d28 @Accumulate Abs diff R1
- vabal.u8 q1, d31, d29 @Accumulate Abs diff R1
+ vabal.u8 q0, d30, d28 @Accumulate Abs diff R1
+ vabal.u8 q1, d31, d29 @Accumulate Abs diff R1
- vabal.u8 q0, d26, d24 @Accumulate Abs diff R2
- vabal.u8 q1, d27, d25 @Accumulate Abs diff R2
+ vabal.u8 q0, d26, d24 @Accumulate Abs diff R2
+ vabal.u8 q1, d27, d25 @Accumulate Abs diff R2
- vabal.u8 q0, d22, d20 @Accumulate Abs diff R3
- vabal.u8 q1, d23, d21 @Aaccumulate Abs diff R3
+ vabal.u8 q0, d22, d20 @Accumulate Abs diff R3
+ vabal.u8 q1, d23, d21 @Aaccumulate Abs diff R3
- vabal.u8 q0, d18, d16 @Accumulate Abs diff R4
- vabal.u8 q1, d19, d17 @Accumulate Abs diff R4
+ vabal.u8 q0, d18, d16 @Accumulate Abs diff R4
+ vabal.u8 q1, d19, d17 @Accumulate Abs diff R4
- vadd.u16 q2, q0, q1 @ADD two accumulators
- vadd.u16 d6, d4, d5 @Add two blk sad
- vpadd.u16 d8, d6, d7 @Add col 1-2 and 3-4 sad
- vpadd.u16 d10, d8, d9 @Add col 12-34 sad
+ vadd.u16 q2, q0, q1 @ADD two accumulators
+ vadd.u16 d6, d4, d5 @Add two blk sad
+ vpadd.u16 d8, d6, d7 @Add col 1-2 and 3-4 sad
+ vpadd.u16 d10, d8, d9 @Add col 12-34 sad
- vmov.u16 r9, d10[0] @move SAD to ARM
- ADD R10, R10, R9 @Add to the global sad
+ vmov.u16 r9, d10[0] @move SAD to ARM
+ ADD R10, R10, R9 @Add to the global sad
funcend_sad_16x16: @End of fucntion process
- ldr r5, [sp, #44]
- ldr r6, [sp, #48]
- str r7, [r6] @Store the is zero reg
- str r10, [r5] @Store sad
+ vpop {d8-d15}
+ ldr r5, [sp, #44]
+ ldr r6, [sp, #48]
+
+ str r7, [r6] @Store the is zero reg
+ str r10, [r5] @Store sad
@SUB SP,SP,#40
- pop {r4-r12, pc}
+ pop {r4-r12, pc}
diff --git a/encoder/arm/ime_platform_macros.h b/encoder/arm/ime_platform_macros.h
index 0f5b2f2..0f5b2f2 100755..100644
--- a/encoder/arm/ime_platform_macros.h
+++ b/encoder/arm/ime_platform_macros.h
diff --git a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
index c442077..e768c21 100755..100644
--- a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
+++ b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
@@ -76,7 +76,7 @@
.p2align 2
.include "ih264_neon_macros.s"
-.globl ih264e_evaluate_intra16x16_modes_av8
+.global ih264e_evaluate_intra16x16_modes_av8
ih264e_evaluate_intra16x16_modes_av8:
diff --git a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
index b02afd1..b02afd1 100755..100644
--- a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
+++ b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
diff --git a/encoder/armv8/ih264e_half_pel_av8.s b/encoder/armv8/ih264e_half_pel_av8.s
index 6dbd8f8..817faa6 100755..100644
--- a/encoder/armv8/ih264e_half_pel_av8.s
+++ b/encoder/armv8/ih264e_half_pel_av8.s
@@ -1015,10 +1015,3 @@ filter_2dvh_skip_row:
///*****************************************
-
-
-
-
-
-
- .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ih264e_platform_macros.h b/encoder/armv8/ih264e_platform_macros.h
index 39cac96..39cac96 100755..100644
--- a/encoder/armv8/ih264e_platform_macros.h
+++ b/encoder/armv8/ih264e_platform_macros.h
diff --git a/encoder/armv8/ime_distortion_metrics_av8.s b/encoder/armv8/ime_distortion_metrics_av8.s
index 99ebc8a..47c3425 100755..100644
--- a/encoder/armv8/ime_distortion_metrics_av8.s
+++ b/encoder/armv8/ime_distortion_metrics_av8.s
@@ -975,4 +975,3 @@ satdq_end_func:
ldp d8, d9, [sp], #16
pop_v_regs
ret
- .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ime_platform_macros.h b/encoder/armv8/ime_platform_macros.h
index 0f5b2f2..0f5b2f2 100755..100644
--- a/encoder/armv8/ime_platform_macros.h
+++ b/encoder/armv8/ime_platform_macros.h
diff --git a/encoder/ih264e.h b/encoder/ih264e.h
index 15a9d8f..15a9d8f 100755..100644
--- a/encoder/ih264e.h
+++ b/encoder/ih264e.h
diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c
index e5c66ea..e5c66ea 100755..100644
--- a/encoder/ih264e_api.c
+++ b/encoder/ih264e_api.c
diff --git a/encoder/ih264e_bitstream.c b/encoder/ih264e_bitstream.c
index e5bfbe4..e5bfbe4 100755..100644
--- a/encoder/ih264e_bitstream.c
+++ b/encoder/ih264e_bitstream.c
diff --git a/encoder/ih264e_bitstream.h b/encoder/ih264e_bitstream.h
index 21360cc..21360cc 100755..100644
--- a/encoder/ih264e_bitstream.h
+++ b/encoder/ih264e_bitstream.h
diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c
index 1f98b6a..1f98b6a 100755..100644
--- a/encoder/ih264e_cavlc.c
+++ b/encoder/ih264e_cavlc.c
diff --git a/encoder/ih264e_cavlc.h b/encoder/ih264e_cavlc.h
index acd0def..acd0def 100755..100644
--- a/encoder/ih264e_cavlc.h
+++ b/encoder/ih264e_cavlc.h
diff --git a/encoder/ih264e_config.h b/encoder/ih264e_config.h
index 2446cdb..2446cdb 100755..100644
--- a/encoder/ih264e_config.h
+++ b/encoder/ih264e_config.h
diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c
index 89243a5..89243a5 100755..100644
--- a/encoder/ih264e_core_coding.c
+++ b/encoder/ih264e_core_coding.c
diff --git a/encoder/ih264e_core_coding.h b/encoder/ih264e_core_coding.h
index 1237d25..1237d25 100755..100644
--- a/encoder/ih264e_core_coding.h
+++ b/encoder/ih264e_core_coding.h
diff --git a/encoder/ih264e_deblk.c b/encoder/ih264e_deblk.c
index 8a11bdb..8a11bdb 100755..100644
--- a/encoder/ih264e_deblk.c
+++ b/encoder/ih264e_deblk.c
diff --git a/encoder/ih264e_deblk.h b/encoder/ih264e_deblk.h
index 9b3b67b..9b3b67b 100755..100644
--- a/encoder/ih264e_deblk.h
+++ b/encoder/ih264e_deblk.h
diff --git a/encoder/ih264e_debug.h b/encoder/ih264e_debug.h
index 5cb0434..5cb0434 100755..100644
--- a/encoder/ih264e_debug.h
+++ b/encoder/ih264e_debug.h
diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h
index 76929ef..76929ef 100755..100644
--- a/encoder/ih264e_defs.h
+++ b/encoder/ih264e_defs.h
diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c
index ffc6fb7..ffc6fb7 100755..100644
--- a/encoder/ih264e_encode.c
+++ b/encoder/ih264e_encode.c
diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c
index 67e5409..67e5409 100755..100644
--- a/encoder/ih264e_encode_header.c
+++ b/encoder/ih264e_encode_header.c
diff --git a/encoder/ih264e_encode_header.h b/encoder/ih264e_encode_header.h
index acae5b6..acae5b6 100755..100644
--- a/encoder/ih264e_encode_header.h
+++ b/encoder/ih264e_encode_header.h
diff --git a/encoder/ih264e_error.h b/encoder/ih264e_error.h
index 8fe9dac..8fe9dac 100755..100644
--- a/encoder/ih264e_error.h
+++ b/encoder/ih264e_error.h
diff --git a/encoder/ih264e_fmt_conv.c b/encoder/ih264e_fmt_conv.c
index 393d6ca..393d6ca 100755..100644
--- a/encoder/ih264e_fmt_conv.c
+++ b/encoder/ih264e_fmt_conv.c
diff --git a/encoder/ih264e_fmt_conv.h b/encoder/ih264e_fmt_conv.h
index 6b33bf0..6b33bf0 100755..100644
--- a/encoder/ih264e_fmt_conv.h
+++ b/encoder/ih264e_fmt_conv.h
diff --git a/encoder/ih264e_function_selector_generic.c b/encoder/ih264e_function_selector_generic.c
index 65f943a..65f943a 100755..100644
--- a/encoder/ih264e_function_selector_generic.c
+++ b/encoder/ih264e_function_selector_generic.c
diff --git a/encoder/ih264e_globals.c b/encoder/ih264e_globals.c
index e2b46a4..e2b46a4 100755..100644
--- a/encoder/ih264e_globals.c
+++ b/encoder/ih264e_globals.c
diff --git a/encoder/ih264e_globals.h b/encoder/ih264e_globals.h
index 4c3de23..4c3de23 100755..100644
--- a/encoder/ih264e_globals.h
+++ b/encoder/ih264e_globals.h
diff --git a/encoder/ih264e_half_pel.c b/encoder/ih264e_half_pel.c
index cb475a1..cb475a1 100755..100644
--- a/encoder/ih264e_half_pel.c
+++ b/encoder/ih264e_half_pel.c
diff --git a/encoder/ih264e_half_pel.h b/encoder/ih264e_half_pel.h
index 92bd37f..92bd37f 100755..100644
--- a/encoder/ih264e_half_pel.h
+++ b/encoder/ih264e_half_pel.h
diff --git a/encoder/ih264e_intra_modes_eval.c b/encoder/ih264e_intra_modes_eval.c
index b41d717..b41d717 100755..100644
--- a/encoder/ih264e_intra_modes_eval.c
+++ b/encoder/ih264e_intra_modes_eval.c
diff --git a/encoder/ih264e_intra_modes_eval.h b/encoder/ih264e_intra_modes_eval.h
index c8402e5..c8402e5 100755..100644
--- a/encoder/ih264e_intra_modes_eval.h
+++ b/encoder/ih264e_intra_modes_eval.h
diff --git a/encoder/ih264e_list.h b/encoder/ih264e_list.h
index 782c007..782c007 100755..100644
--- a/encoder/ih264e_list.h
+++ b/encoder/ih264e_list.h
diff --git a/encoder/ih264e_master.h b/encoder/ih264e_master.h
index 6c7505a..6c7505a 100755..100644
--- a/encoder/ih264e_master.h
+++ b/encoder/ih264e_master.h
diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c
index 2dd0974..2dd0974 100755..100644
--- a/encoder/ih264e_mc.c
+++ b/encoder/ih264e_mc.c
diff --git a/encoder/ih264e_mc.h b/encoder/ih264e_mc.h
index 965e1d1..965e1d1 100755..100644
--- a/encoder/ih264e_mc.h
+++ b/encoder/ih264e_mc.h
diff --git a/encoder/ih264e_me.c b/encoder/ih264e_me.c
index 9e8d7a3..9e8d7a3 100755..100644
--- a/encoder/ih264e_me.c
+++ b/encoder/ih264e_me.c
diff --git a/encoder/ih264e_me.h b/encoder/ih264e_me.h
index c4834a1..c4834a1 100755..100644
--- a/encoder/ih264e_me.h
+++ b/encoder/ih264e_me.h
diff --git a/encoder/ih264e_modify_frm_rate.c b/encoder/ih264e_modify_frm_rate.c
index bc0e873..bc0e873 100755..100644
--- a/encoder/ih264e_modify_frm_rate.c
+++ b/encoder/ih264e_modify_frm_rate.c
diff --git a/encoder/ih264e_modify_frm_rate.h b/encoder/ih264e_modify_frm_rate.h
index c301e2c..c301e2c 100755..100644
--- a/encoder/ih264e_modify_frm_rate.h
+++ b/encoder/ih264e_modify_frm_rate.h
diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c
index fa67d84..fa67d84 100755..100644
--- a/encoder/ih264e_process.c
+++ b/encoder/ih264e_process.c
diff --git a/encoder/ih264e_process.h b/encoder/ih264e_process.h
index 9715434..9715434 100755..100644
--- a/encoder/ih264e_process.h
+++ b/encoder/ih264e_process.h
diff --git a/encoder/ih264e_rate_control.c b/encoder/ih264e_rate_control.c
index 1e2fe4f..1e2fe4f 100755..100644
--- a/encoder/ih264e_rate_control.c
+++ b/encoder/ih264e_rate_control.c
diff --git a/encoder/ih264e_rate_control.h b/encoder/ih264e_rate_control.h
index de9466a..de9466a 100755..100644
--- a/encoder/ih264e_rate_control.h
+++ b/encoder/ih264e_rate_control.h
diff --git a/encoder/ih264e_rc_mem_interface.c b/encoder/ih264e_rc_mem_interface.c
index e4d5781..e4d5781 100755..100644
--- a/encoder/ih264e_rc_mem_interface.c
+++ b/encoder/ih264e_rc_mem_interface.c
diff --git a/encoder/ih264e_rc_mem_interface.h b/encoder/ih264e_rc_mem_interface.h
index a2946a7..a2946a7 100755..100644
--- a/encoder/ih264e_rc_mem_interface.h
+++ b/encoder/ih264e_rc_mem_interface.h
diff --git a/encoder/ih264e_statistics.h b/encoder/ih264e_statistics.h
index 0ab33ca..0ab33ca 100755..100644
--- a/encoder/ih264e_statistics.h
+++ b/encoder/ih264e_statistics.h
diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h
index 1043a53..1043a53 100755..100644
--- a/encoder/ih264e_structs.h
+++ b/encoder/ih264e_structs.h
diff --git a/encoder/ih264e_time_stamp.c b/encoder/ih264e_time_stamp.c
index a6a7f3c..a6a7f3c 100755..100644
--- a/encoder/ih264e_time_stamp.c
+++ b/encoder/ih264e_time_stamp.c
diff --git a/encoder/ih264e_time_stamp.h b/encoder/ih264e_time_stamp.h
index 1ee559d..1ee559d 100755..100644
--- a/encoder/ih264e_time_stamp.h
+++ b/encoder/ih264e_time_stamp.h
diff --git a/encoder/ih264e_trace.h b/encoder/ih264e_trace.h
index 8134524..8134524 100755..100644
--- a/encoder/ih264e_trace.h
+++ b/encoder/ih264e_trace.h
diff --git a/encoder/ih264e_trace_support.h b/encoder/ih264e_trace_support.h
index c35bd4f..c35bd4f 100755..100644
--- a/encoder/ih264e_trace_support.h
+++ b/encoder/ih264e_trace_support.h
diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c
index f0086cb..f0086cb 100755..100644
--- a/encoder/ih264e_utils.c
+++ b/encoder/ih264e_utils.c
diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h
index 651dad9..651dad9 100755..100644
--- a/encoder/ih264e_utils.h
+++ b/encoder/ih264e_utils.h
diff --git a/encoder/ih264e_version.c b/encoder/ih264e_version.c
index 3dcba8d..3dcba8d 100755..100644
--- a/encoder/ih264e_version.c
+++ b/encoder/ih264e_version.c
diff --git a/encoder/ih264e_version.h b/encoder/ih264e_version.h
index 303a1e2..303a1e2 100755..100644
--- a/encoder/ih264e_version.h
+++ b/encoder/ih264e_version.h
diff --git a/encoder/ime.c b/encoder/ime.c
index c89aaab..c89aaab 100755..100644
--- a/encoder/ime.c
+++ b/encoder/ime.c
diff --git a/encoder/ime.h b/encoder/ime.h
index 5c039e8..5c039e8 100755..100644
--- a/encoder/ime.h
+++ b/encoder/ime.h
diff --git a/encoder/ime_defs.h b/encoder/ime_defs.h
index 14d9c55..14d9c55 100755..100644
--- a/encoder/ime_defs.h
+++ b/encoder/ime_defs.h
diff --git a/encoder/ime_distortion_metrics.c b/encoder/ime_distortion_metrics.c
index 23a1fbc..23a1fbc 100755..100644
--- a/encoder/ime_distortion_metrics.c
+++ b/encoder/ime_distortion_metrics.c
diff --git a/encoder/ime_distortion_metrics.h b/encoder/ime_distortion_metrics.h
index a30e1fc..a30e1fc 100755..100644
--- a/encoder/ime_distortion_metrics.h
+++ b/encoder/ime_distortion_metrics.h
diff --git a/encoder/ime_macros.h b/encoder/ime_macros.h
index a7b8c65..a7b8c65 100755..100644
--- a/encoder/ime_macros.h
+++ b/encoder/ime_macros.h
diff --git a/encoder/ime_statistics.h b/encoder/ime_statistics.h
index eeacaf2..eeacaf2 100755..100644
--- a/encoder/ime_statistics.h
+++ b/encoder/ime_statistics.h
diff --git a/encoder/ime_structs.h b/encoder/ime_structs.h
index 7819b91..7819b91 100755..100644
--- a/encoder/ime_structs.h
+++ b/encoder/ime_structs.h
diff --git a/encoder/ime_typedefs.h b/encoder/ime_typedefs.h
index d36632d..d36632d 100755..100644
--- a/encoder/ime_typedefs.h
+++ b/encoder/ime_typedefs.h
diff --git a/encoder/irc_bit_allocation.c b/encoder/irc_bit_allocation.c
index 1dfd9de..1dfd9de 100755..100644
--- a/encoder/irc_bit_allocation.c
+++ b/encoder/irc_bit_allocation.c
diff --git a/encoder/irc_bit_allocation.h b/encoder/irc_bit_allocation.h
index 19ba0df..19ba0df 100755..100644
--- a/encoder/irc_bit_allocation.h
+++ b/encoder/irc_bit_allocation.h
diff --git a/encoder/irc_cbr_buffer_control.c b/encoder/irc_cbr_buffer_control.c
index c179a28..c179a28 100755..100644
--- a/encoder/irc_cbr_buffer_control.c
+++ b/encoder/irc_cbr_buffer_control.c
diff --git a/encoder/irc_cbr_buffer_control.h b/encoder/irc_cbr_buffer_control.h
index 2534961..2534961 100755..100644
--- a/encoder/irc_cbr_buffer_control.h
+++ b/encoder/irc_cbr_buffer_control.h
diff --git a/encoder/irc_cntrl_param.h b/encoder/irc_cntrl_param.h
index 82235f7..82235f7 100755..100644
--- a/encoder/irc_cntrl_param.h
+++ b/encoder/irc_cntrl_param.h
diff --git a/encoder/irc_common.h b/encoder/irc_common.h
index c341de4..c341de4 100755..100644
--- a/encoder/irc_common.h
+++ b/encoder/irc_common.h
diff --git a/encoder/irc_datatypes.h b/encoder/irc_datatypes.h
index 8e4685a..8e4685a 100755..100644
--- a/encoder/irc_datatypes.h
+++ b/encoder/irc_datatypes.h
diff --git a/encoder/irc_est_sad.c b/encoder/irc_est_sad.c
index 0d8abc2..0d8abc2 100755..100644
--- a/encoder/irc_est_sad.c
+++ b/encoder/irc_est_sad.c
diff --git a/encoder/irc_est_sad.h b/encoder/irc_est_sad.h
index c8238c9..c8238c9 100755..100644
--- a/encoder/irc_est_sad.h
+++ b/encoder/irc_est_sad.h
diff --git a/encoder/irc_fixed_point_error_bits.c b/encoder/irc_fixed_point_error_bits.c
index 42dcfc5..42dcfc5 100755..100644
--- a/encoder/irc_fixed_point_error_bits.c
+++ b/encoder/irc_fixed_point_error_bits.c
diff --git a/encoder/irc_fixed_point_error_bits.h b/encoder/irc_fixed_point_error_bits.h
index 4ddf1eb..4ddf1eb 100755..100644
--- a/encoder/irc_fixed_point_error_bits.h
+++ b/encoder/irc_fixed_point_error_bits.h
diff --git a/encoder/irc_frame_info_collector.c b/encoder/irc_frame_info_collector.c
index 65f24c4..65f24c4 100755..100644
--- a/encoder/irc_frame_info_collector.c
+++ b/encoder/irc_frame_info_collector.c
diff --git a/encoder/irc_frame_info_collector.h b/encoder/irc_frame_info_collector.h
index 58dc467..58dc467 100755..100644
--- a/encoder/irc_frame_info_collector.h
+++ b/encoder/irc_frame_info_collector.h
diff --git a/encoder/irc_mb_model_based.c b/encoder/irc_mb_model_based.c
index 880ee19..880ee19 100755..100644
--- a/encoder/irc_mb_model_based.c
+++ b/encoder/irc_mb_model_based.c
diff --git a/encoder/irc_mb_model_based.h b/encoder/irc_mb_model_based.h
index aad520a..aad520a 100755..100644
--- a/encoder/irc_mb_model_based.h
+++ b/encoder/irc_mb_model_based.h
diff --git a/encoder/irc_mem_req_and_acq.h b/encoder/irc_mem_req_and_acq.h
index a2946a7..a2946a7 100755..100644
--- a/encoder/irc_mem_req_and_acq.h
+++ b/encoder/irc_mem_req_and_acq.h
diff --git a/encoder/irc_picture_type.c b/encoder/irc_picture_type.c
index 186188c..186188c 100755..100644
--- a/encoder/irc_picture_type.c
+++ b/encoder/irc_picture_type.c
diff --git a/encoder/irc_picture_type.h b/encoder/irc_picture_type.h
index 1af5424..1af5424 100755..100644
--- a/encoder/irc_picture_type.h
+++ b/encoder/irc_picture_type.h
diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c
index 6c6586e..6c6586e 100755..100644
--- a/encoder/irc_rate_control_api.c
+++ b/encoder/irc_rate_control_api.c
diff --git a/encoder/irc_rate_control_api.h b/encoder/irc_rate_control_api.h
index 0173037..0173037 100755..100644
--- a/encoder/irc_rate_control_api.h
+++ b/encoder/irc_rate_control_api.h
diff --git a/encoder/irc_rate_control_api_structs.h b/encoder/irc_rate_control_api_structs.h
index ba39e7f..ba39e7f 100755..100644
--- a/encoder/irc_rate_control_api_structs.h
+++ b/encoder/irc_rate_control_api_structs.h
diff --git a/encoder/irc_rd_model.c b/encoder/irc_rd_model.c
index f5c0737..f5c0737 100755..100644
--- a/encoder/irc_rd_model.c
+++ b/encoder/irc_rd_model.c
diff --git a/encoder/irc_rd_model.h b/encoder/irc_rd_model.h
index 8be31c1..8be31c1 100755..100644
--- a/encoder/irc_rd_model.h
+++ b/encoder/irc_rd_model.h
diff --git a/encoder/irc_rd_model_struct.h b/encoder/irc_rd_model_struct.h
index dc4c0ea..dc4c0ea 100755..100644
--- a/encoder/irc_rd_model_struct.h
+++ b/encoder/irc_rd_model_struct.h
diff --git a/encoder/irc_trace_support.h b/encoder/irc_trace_support.h
index c35bd4f..c35bd4f 100755..100644
--- a/encoder/irc_trace_support.h
+++ b/encoder/irc_trace_support.h
diff --git a/encoder/irc_vbr_storage_vbv.c b/encoder/irc_vbr_storage_vbv.c
index 23e9959..23e9959 100755..100644
--- a/encoder/irc_vbr_storage_vbv.c
+++ b/encoder/irc_vbr_storage_vbv.c
diff --git a/encoder/irc_vbr_storage_vbv.h b/encoder/irc_vbr_storage_vbv.h
index c53c66d..c53c66d 100755..100644
--- a/encoder/irc_vbr_storage_vbv.h
+++ b/encoder/irc_vbr_storage_vbv.h
diff --git a/encoder/irc_vbr_str_prms.c b/encoder/irc_vbr_str_prms.c
index 29055c2..29055c2 100755..100644
--- a/encoder/irc_vbr_str_prms.c
+++ b/encoder/irc_vbr_str_prms.c
diff --git a/encoder/irc_vbr_str_prms.h b/encoder/irc_vbr_str_prms.h
index 34301d8..34301d8 100755..100644
--- a/encoder/irc_vbr_str_prms.h
+++ b/encoder/irc_vbr_str_prms.h
diff --git a/encoder/ithread.h b/encoder/ithread.h
index 82170a5..82170a5 100755..100644
--- a/encoder/ithread.h
+++ b/encoder/ithread.h
diff --git a/encoder/iv2.h b/encoder/iv2.h
index 538bb1e..538bb1e 100755..100644
--- a/encoder/iv2.h
+++ b/encoder/iv2.h
diff --git a/encoder/ive2.h b/encoder/ive2.h
index 8cb0fd1..8cb0fd1 100755..100644
--- a/encoder/ive2.h
+++ b/encoder/ive2.h
diff --git a/encoder/mips/ih264e_function_selector.c b/encoder/mips/ih264e_function_selector.c
index 58ec4d0..58ec4d0 100755..100644
--- a/encoder/mips/ih264e_function_selector.c
+++ b/encoder/mips/ih264e_function_selector.c
diff --git a/encoder/mips/ih264e_platform_macros.h b/encoder/mips/ih264e_platform_macros.h
index ed1edd4..ed1edd4 100755..100644
--- a/encoder/mips/ih264e_platform_macros.h
+++ b/encoder/mips/ih264e_platform_macros.h
diff --git a/encoder/mips/ime_platform_macros.h b/encoder/mips/ime_platform_macros.h
index 18e2e8f..18e2e8f 100755..100644
--- a/encoder/mips/ime_platform_macros.h
+++ b/encoder/mips/ime_platform_macros.h
diff --git a/encoder/x86/ih264e_function_selector.c b/encoder/x86/ih264e_function_selector.c
index 429cdab..429cdab 100755..100644
--- a/encoder/x86/ih264e_function_selector.c
+++ b/encoder/x86/ih264e_function_selector.c
diff --git a/encoder/x86/ih264e_function_selector_sse42.c b/encoder/x86/ih264e_function_selector_sse42.c
index 6fa6308..6fa6308 100755..100644
--- a/encoder/x86/ih264e_function_selector_sse42.c
+++ b/encoder/x86/ih264e_function_selector_sse42.c
diff --git a/encoder/x86/ih264e_function_selector_ssse3.c b/encoder/x86/ih264e_function_selector_ssse3.c
index 7401e53..7401e53 100755..100644
--- a/encoder/x86/ih264e_function_selector_ssse3.c
+++ b/encoder/x86/ih264e_function_selector_ssse3.c
diff --git a/encoder/x86/ih264e_half_pel_ssse3.c b/encoder/x86/ih264e_half_pel_ssse3.c
index 42580fa..42580fa 100755..100644
--- a/encoder/x86/ih264e_half_pel_ssse3.c
+++ b/encoder/x86/ih264e_half_pel_ssse3.c
diff --git a/encoder/x86/ih264e_intra_modes_eval_ssse3.c b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
index 657921f..0f4a9ad 100755..100644
--- a/encoder/x86/ih264e_intra_modes_eval_ssse3.c
+++ b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
@@ -487,7 +487,7 @@ void ih264e_evaluate_intra_4x4_modes_ssse3(UWORD8 *pu1_src,
INT_MAX, INT_MAX, INT_MAX, INT_MAX };
WORD32 min_cost;
- WORD32 lambda4 = u4_lambda << 2;
+ UWORD32 lambda4 = u4_lambda << 2;
WORD32 dst_strd2, dst_strd3;
__m128i left_top_16x8b, src_16x8b, pred0_16x8b, sad_8x16b;
diff --git a/encoder/x86/ih264e_platform_macros.h b/encoder/x86/ih264e_platform_macros.h
index b4dfadd..b4dfadd 100755..100644
--- a/encoder/x86/ih264e_platform_macros.h
+++ b/encoder/x86/ih264e_platform_macros.h
diff --git a/encoder/x86/ime_distortion_metrics_sse42.c b/encoder/x86/ime_distortion_metrics_sse42.c
index 0876788..baf18a4 100755..100644
--- a/encoder/x86/ime_distortion_metrics_sse42.c
+++ b/encoder/x86/ime_distortion_metrics_sse42.c
@@ -110,6 +110,7 @@ void ime_compute_sad_16x16_sse42(UWORD8 *pu1_src,
__m128i res_r0, res_r1, res_r2, res_r3;
__m128i sad_val;
int val1, val2;
+ UNUSED (i4_max_sad);
// Row 0-3 sad calculation
src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
@@ -248,6 +249,7 @@ void ime_compute_sad_16x8_sse42(UWORD8 *pu1_src,
WORD32 i4_max_sad,
WORD32 *pi4_mb_distortion)
{
+ UNUSED (i4_max_sad);
__m128i src_r0, src_r1, src_r2, src_r3;
__m128i est_r0, est_r1, est_r2, est_r3;
__m128i res_r0, res_r1, res_r2, res_r3;
@@ -498,6 +500,7 @@ void ime_compute_sad_16x16_fast_sse42(UWORD8 *pu1_src,
WORD32 i4_max_sad,
WORD32 *pi4_mb_distortion)
{
+ UNUSED (i4_max_sad);
__m128i src_r0, src_r1, src_r2, src_r3;
__m128i est_r0, est_r1, est_r2, est_r3;
__m128i res_r0, res_r1, res_r2, res_r3;
diff --git a/encoder/x86/ime_platform_macros.h b/encoder/x86/ime_platform_macros.h
index 18e2e8f..18e2e8f 100755..100644
--- a/encoder/x86/ime_platform_macros.h
+++ b/encoder/x86/ime_platform_macros.h
diff --git a/test/Android.mk b/test/Android.mk
index 0085832..0085832 100755..100644
--- a/test/Android.mk
+++ b/test/Android.mk
diff --git a/test/decoder.mk b/test/decoder.mk
index 1a49a92..1a49a92 100755..100644
--- a/test/decoder.mk
+++ b/test/decoder.mk
diff --git a/test/decoder/main.c b/test/decoder/main.c
index 0076ce9..0076ce9 100755..100644
--- a/test/decoder/main.c
+++ b/test/decoder/main.c
diff --git a/test/encoder.mk b/test/encoder.mk
index 9a0980e..9a0980e 100755..100644
--- a/test/encoder.mk
+++ b/test/encoder.mk
diff --git a/test/encoder/app.h b/test/encoder/app.h
index 7c16fcd..7c16fcd 100755..100644
--- a/test/encoder/app.h
+++ b/test/encoder/app.h
diff --git a/test/encoder/input.c b/test/encoder/input.c
index c292612..c292612 100755..100644
--- a/test/encoder/input.c
+++ b/test/encoder/input.c
diff --git a/test/encoder/main.c b/test/encoder/main.c
index 2a9635d..bb9cabf 100755..100644
--- a/test/encoder/main.c
+++ b/test/encoder/main.c
@@ -29,7 +29,10 @@
#include <assert.h>
#include <string.h>
#include <sys/time.h>
+
+#ifndef IOS
#include <malloc.h>
+#endif
#ifdef WINDOWS_TIMER
#include "windows.h"
@@ -1989,7 +1992,7 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
/* 20 11 2013 100189 Initial Version */
/*****************************************************************************/
#ifdef IOS
-int h264enc_main(char * homedir)
+int h264enc_main(char * homedir,char *documentdir, int screen_wd, int screen_ht)
#else
int main(int argc, char *argv[])
#endif
@@ -2036,6 +2039,9 @@ int main(int argc, char *argv[])
strcpy(ac_cfg_fname, argv[1]);
}
+#else
+ strcpy(ac_cfg_fname, "test.cfg");
+
#endif
/*************************************************************************/
@@ -2406,22 +2412,22 @@ int main(int argc, char *argv[])
#ifdef IOS
/* Correct file paths */
- sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_ip_fname);
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_ip_fname);
strcpy (s_app_ctxt.ac_ip_fname, filename_with_path);
- sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_op_fname);
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_op_fname);
strcpy (s_app_ctxt.ac_op_fname, filename_with_path);
- sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_recon_fname);
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_recon_fname);
strcpy (s_app_ctxt.ac_recon_fname, filename_with_path);
- sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_chksum_fname);
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_chksum_fname);
strcpy (s_app_ctxt.ac_chksum_fname, filename_with_path);
- sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_mb_info_fname);
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_mb_info_fname);
strcpy (s_app_ctxt.ac_mb_info_fname, filename_with_path);
- sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_pic_info_fname);
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctxt.ac_pic_info_fname);
strcpy (s_app_ctxt.ac_pic_info_fname, filename_with_path);
#endif
diff --git a/test/encoder/output.c b/test/encoder/output.c
index e0f27dd..e0f27dd 100755..100644
--- a/test/encoder/output.c
+++ b/test/encoder/output.c
diff --git a/test/encoder/psnr.c b/test/encoder/psnr.c
index c9bb6a1..c9bb6a1 100755..100644
--- a/test/encoder/psnr.c
+++ b/test/encoder/psnr.c
diff --git a/test/encoder/psnr.h b/test/encoder/psnr.h
index fd388cf..fd388cf 100755..100644
--- a/test/encoder/psnr.h
+++ b/test/encoder/psnr.h
diff --git a/test/encoder/recon.c b/test/encoder/recon.c
index 7fd0f5c..7fd0f5c 100755..100644
--- a/test/encoder/recon.c
+++ b/test/encoder/recon.c